diff --git a/graph_net/paddle/utils.py b/graph_net/paddle/utils.py index e8b1ceed..a8754f61 100644 --- a/graph_net/paddle/utils.py +++ b/graph_net/paddle/utils.py @@ -180,9 +180,10 @@ def replay_tensor(info): if "data" in info and info["data"] is not None: return paddle.reshape(info["data"], shape).to(dtype).to(device) elif dtype == paddle.int32 or dtype == paddle.int64: - # for some ops(binary_cross_entropy), label data can only be set 0 or 1. return paddle.cast( - paddle.randint(low=0, high=2, shape=shape, dtype="int64"), + paddle.randint( + low=min_value, high=max_value + 1, shape=shape, dtype="int64" + ), dtype, ).to(device) elif dtype == paddle.bool: @@ -192,7 +193,6 @@ def replay_tensor(info): ).to(device) else: std = info["info"]["std"] - # return paddle.randn(shape).to(dtype).to(device) * std * 1e-3 + 1e-2 return ( paddle.uniform(shape, dtype="float32", min=min_value, max=max_value) .to(dtype) diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/graph_net.json b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/input_meta.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/input_meta.py new file mode 100644 index 00000000..0aae8dab --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [4, 3, 384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [529, 32] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [529, 8] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [529, 32] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [529, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [529, 8] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [529, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/model.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/model.py new file mode 100644 index 00000000..4597914c --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/model.py @@ -0,0 +1,10708 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.conv2d: (4x128x96x96xf32) <- (4x3x384x384xf32, 128x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, -1, 1, 1] + + # pd_op.reshape: (1x128x1x1xf32) <- (128xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_9) + del full_int_array_9, parameter_303 + + # pd_op.add: (4x128x96x96xf32) <- (4x128x96x96xf32, 1x128x1x1xf32) + add_0 = paddle._C_ops.add(conv2d_0, reshape_0) + + # pd_op.flatten: (4x128x9216xf32) <- (4x128x96x96xf32) + flatten_1 = paddle._C_ops.flatten(add_0, 2, 3) + + # pd_op.transpose: (4x9216x128xf32) <- (4x128x9216xf32) + transpose_0 = paddle._C_ops.transpose(flatten_1, [0, 2, 1]) + del flatten_1 + + # pd_op.layer_norm: (4x9216x128xf32, 4x9216xf32, 4x9216xf32) <- (4x9216x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302 + + # pd_op.layer_norm: (4x9216x128xf32, 4x9216xf32, 4x9216xf32) <- (4x9216x128xf32, 128xf32, 128xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [4, 96, 96, 128] + + # pd_op.reshape: (4x96x96x128xf32) <- (4x9216x128xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, full_int_array_10) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_11 = [4, 8, 12, 8, 12, 128] + + # pd_op.reshape: (4x8x12x8x12x128xf32) <- (4x96x96x128xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_1, full_int_array_11) + + # pd_op.transpose: (4x8x8x12x12x128xf32) <- (4x8x12x8x12x128xf32) + transpose_1 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_12 = [-1, 12, 12, 128] + + # pd_op.reshape: (256x12x12x128xf32) <- (4x8x8x12x12x128xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(transpose_1, full_int_array_12) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_13 = [-1, 144, 128] + + # pd_op.reshape: (256x144x128xf32) <- (256x12x12x128xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(reshape_2, full_int_array_13) + + # pd_op.matmul: (256x144x384xf32) <- (256x144x128xf32, 128x384xf32) + matmul_0 = paddle._C_ops.matmul(reshape_3, parameter_298, False, False) + del parameter_298 + + # pd_op.add: (256x144x384xf32) <- (256x144x384xf32, 384xf32) + add_1 = paddle._C_ops.add(matmul_0, parameter_297) + del parameter_297 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_14 = [256, 144, 3, 4, 32] + + # pd_op.reshape: (256x144x3x4x32xf32) <- (256x144x384xf32, 5xi64) + reshape_212 = paddle._C_ops.reshape(add_1, full_int_array_14) + + # pd_op.transpose: (3x256x4x144x32xf32) <- (256x144x3x4x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_212, [2, 0, 3, 1, 4]) + del reshape_212 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_264 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_257 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_254 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_247 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_231 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_224 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_221 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_214 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_211 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_204 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_201 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_194 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_191 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_184 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_181 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_174 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_171 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_164 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_161 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_154 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_151 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_144 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_141 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_134 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_131 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_124 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_121 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_114 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_111 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_104 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_101 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_94 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_91 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_84 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_81 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_74 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_71 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_64 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_61 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_54 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_38 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_31 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_28 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_21 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_10 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_3 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_2 = full_int_array_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [1] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_259 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_258 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_249 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_248 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_226 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_225 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_216 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_215 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_206 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_205 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_196 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_195 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_186 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_185 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_176 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_175 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_166 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_165 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_156 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_155 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_146 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_145 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_136 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_135 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_126 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_125 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_116 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_115 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_106 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_105 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_96 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_95 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_86 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_85 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_76 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_75 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_66 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_65 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_56 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_55 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_33 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_32 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_23 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_22 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_5 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_4 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_0 = full_int_array_1 + + # pd_op.slice: (256x4x144x32xf32) <- (3x256x4x144x32xf32, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [2] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_269 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_267 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_261 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_260 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_251 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_250 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_228 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_227 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_218 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_217 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_208 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_207 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_198 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_197 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_188 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_187 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_178 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_177 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_168 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_167 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_158 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_157 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_148 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_147 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_138 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_137 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_128 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_127 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_118 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_117 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_108 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_107 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_98 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_97 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_88 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_87 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_78 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_77 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_68 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_67 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_58 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_57 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_35 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_34 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_25 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_24 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_7 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_6 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_1 = full_int_array_2 + + # pd_op.slice: (256x4x144x32xf32) <- (3x256x4x144x32xf32, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [3] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_262 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_252 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_229 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_219 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_209 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_199 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_189 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_179 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_169 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_159 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_149 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_139 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_129 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_119 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_109 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_99 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_89 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_79 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_69 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_59 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_36 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_26 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_8 = full_int_array_3 + + # pd_op.slice: (256x4x144x32xf32) <- (3x256x4x144x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xf32) <- (1xf32) + assign_263 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_253 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_230 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_220 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_210 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_200 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_190 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_180 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_170 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_160 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_150 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_140 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_130 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_120 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_110 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_100 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_90 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_80 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_70 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_60 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_37 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_27 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_9 = full_0 + + # pd_op.scale: (256x4x144x32xf32) <- (256x4x144x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_24, full_0, float("0"), True) + del slice_24 + + # pd_op.transpose: (256x4x32x144xf32) <- (256x4x144x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_25, [0, 1, 3, 2]) + del slice_25 + + # pd_op.matmul: (256x4x144x144xf32) <- (256x4x144x32xf32, 256x4x32x144xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_15 = [-1] + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_4 = paddle._C_ops.reshape(data_23, full_int_array_15) + del data_23 + + # pd_op.index_select: (20736x4xf32) <- (529x4xf32, 20736xi64) + index_select_0 = paddle._C_ops.index_select(data_24, reshape_4, 0) + del data_24 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_16 = [144, 144, -1] + + # pd_op.reshape: (144x144x4xf32) <- (20736x4xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(index_select_0, full_int_array_16) + + # pd_op.transpose: (4x144x144xf32) <- (144x144x4xf32) + transpose_4 = paddle._C_ops.transpose(reshape_213, [2, 0, 1]) + del reshape_213 + + # pd_op.unsqueeze: (1x4x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_0) + + # pd_op.add: (256x4x144x144xf32) <- (256x4x144x144xf32, 1x4x144x144xf32) + add_170 = paddle._C_ops.add(matmul_1, unsqueeze_0) + + # pd_op.softmax: (256x4x144x144xf32) <- (256x4x144x144xf32) + softmax_0 = paddle._C_ops.softmax(add_170, -1) + del add_170 + + # pd_op.matmul: (256x4x144x32xf32) <- (256x4x144x144xf32, 256x4x144x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_0, slice_0, False, False) + + # pd_op.transpose: (256x144x4x32xf32) <- (256x4x144x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_17 = [256, 144, 128] + + # pd_op.reshape: (256x144x128xf32) <- (256x144x4x32xf32, 3xi64) + reshape_5 = paddle._C_ops.reshape(transpose_5, full_int_array_17) + + # pd_op.matmul: (256x144x128xf32) <- (256x144x128xf32, 128x128xf32) + matmul_2 = paddle._C_ops.matmul(reshape_5, parameter_296, False, False) + del parameter_296 + + # pd_op.add: (256x144x128xf32) <- (256x144x128xf32, 128xf32) + add_2 = paddle._C_ops.add(matmul_2, parameter_295) + del parameter_295 + + # pd_op.reshape: (256x12x12x128xf32) <- (256x144x128xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_2, full_int_array_12) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_18 = [-1, 8, 8, 12, 12, 128] + + # pd_op.reshape: (4x8x8x12x12x128xf32) <- (256x12x12x128xf32, 6xi64) + reshape_214 = paddle._C_ops.reshape(reshape_6, full_int_array_18) + + # pd_op.transpose: (4x8x12x8x12x128xf32) <- (4x8x8x12x12x128xf32) + transpose_6 = paddle._C_ops.transpose(reshape_214, [0, 1, 3, 2, 4, 5]) + del reshape_214 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_19 = [-1, 96, 96, 128] + + # pd_op.reshape: (4x96x96x128xf32) <- (4x8x12x8x12x128xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(transpose_6, full_int_array_19) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_20 = [4, 9216, 128] + + # pd_op.reshape: (4x9216x128xf32) <- (4x96x96x128xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(reshape_7, full_int_array_20) + + # pd_op.add: (4x9216x128xf32) <- (4x9216x128xf32, 4x9216x128xf32) + add_3 = paddle._C_ops.add(layer_norm_0, reshape_8) + + # pd_op.layer_norm: (4x9216x128xf32, 4x9216xf32, 4x9216xf32) <- (4x9216x128xf32, 128xf32, 128xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_3, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (4x9216x512xf32) <- (4x9216x128xf32, 128x512xf32) + matmul_3 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del parameter_292 + + # pd_op.add: (4x9216x512xf32) <- (4x9216x512xf32, 512xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_291) + del parameter_291 + + # pd_op.gelu: (4x9216x512xf32) <- (4x9216x512xf32) + gelu_0 = paddle._C_ops.gelu(add_4, False) + + # pd_op.matmul: (4x9216x128xf32) <- (4x9216x512xf32, 512x128xf32) + matmul_4 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del parameter_290 + + # pd_op.add: (4x9216x128xf32) <- (4x9216x128xf32, 128xf32) + add_5 = paddle._C_ops.add(matmul_4, parameter_289) + del parameter_289 + + # pd_op.add: (4x9216x128xf32) <- (4x9216x128xf32, 4x9216x128xf32) + add_6 = paddle._C_ops.add(add_3, add_5) + + # pd_op.layer_norm: (4x9216x128xf32, 4x9216xf32, 4x9216xf32) <- (4x9216x128xf32, 128xf32, 128xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # pd_op.reshape: (4x96x96x128xf32) <- (4x9216x128xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(layer_norm_9, full_int_array_10) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_4 = [-6, -6] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_256 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_223 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_203 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_183 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_163 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_143 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_123 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_103 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_83 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_63 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_30 = full_int_array_4 + + # pd_op.roll: (4x96x96x128xf32) <- (4x96x96x128xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_9, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x8x12x8x12x128xf32) <- (4x96x96x128xf32, 6xi64) + reshape_215 = paddle._C_ops.reshape(roll_0, full_int_array_11) + del full_int_array_11 + + # pd_op.transpose: (4x8x8x12x12x128xf32) <- (4x8x12x8x12x128xf32) + transpose_7 = paddle._C_ops.transpose(reshape_215, [0, 1, 3, 2, 4, 5]) + del reshape_215 + + # pd_op.reshape: (256x12x12x128xf32) <- (4x8x8x12x12x128xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(transpose_7, full_int_array_12) + + # pd_op.reshape: (256x144x128xf32) <- (256x12x12x128xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(reshape_10, full_int_array_13) + del full_int_array_13 + + # pd_op.full: (1x96x96x1xf32) <- () + full_25 = paddle._C_ops.full( + [1, 96, 96, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [0, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_234 = full_int_array_21 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_41 = full_int_array_21 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_12 = full_int_array_21 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-12, -12] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [1, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_268 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_243 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_50 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_18 = full_int_array_23 + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_25, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_25 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [0, -12] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [-12, -6] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_26 = [0, -6] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_27 = [-12, 2147483647] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [-12, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [-6, -12] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_30 = [-6, 2147483647] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_31 = [-6, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [2147483647, -12] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_33 = [2147483647, -6] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_34 = [2147483647, 2147483647] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_244 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_241 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_238 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_235 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_51 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_48 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_45 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_42 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_19 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_16 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_14 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_13 = full_int_array_34 + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [1, 8, 12, 8, 12, 1] + + # pd_op.reshape: (1x8x12x8x12x1xf32) <- (1x96x96x1xf32, 6xi64) + reshape_216 = paddle._C_ops.reshape(set_value__0, full_int_array_35) + del full_int_array_35 + + # pd_op.transpose: (1x8x8x12x12x1xf32) <- (1x8x12x8x12x1xf32) + transpose_146 = paddle._C_ops.transpose(reshape_216, [0, 1, 3, 2, 4, 5]) + del reshape_216 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 12, 12, 1] + + # pd_op.reshape: (64x12x12x1xf32) <- (1x8x8x12x12x1xf32, 4xi64) + reshape_217 = paddle._C_ops.reshape(transpose_146, full_int_array_36) + del transpose_146 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_37 = [-1, 144] + + # pd_op.reshape: (64x144xf32) <- (64x12x12x1xf32, 2xi64) + reshape_218 = paddle._C_ops.reshape(reshape_217, full_int_array_37) + del reshape_217 + + # pd_op.unsqueeze: (64x1x144xf32) <- (64x144xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_1) + + # pd_op.unsqueeze: (64x144x1xf32) <- (64x144xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_2) + del reshape_218 + + # pd_op.subtract: (64x144x144xf32) <- (64x1x144xf32, 64x144x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.full: (xf32) <- () + full_26 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x144x144xb) <- (64x144x144xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_26) + + # pd_op.full: (64x144x144xf32) <- () + full_27 = paddle._C_ops.full( + [64, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x144x144xf32) <- (64x144x144xb, 64x144x144xf32, 64x144x144xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_27, subtract_0) + del full_27, not_equal_0, subtract_0 + + # pd_op.equal: (64x144x144xb) <- (64x144x144xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_26) + + # pd_op.full: (64x144x144xf32) <- () + full_28 = paddle._C_ops.full( + [64, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x144x144xf32) <- (64x144x144xb, 64x144x144xf32, 64x144x144xf32) + where_1 = paddle._C_ops.where(equal_0, full_28, where_0) + del equal_0, full_28, where_0 + + # pd_op.matmul: (256x144x384xf32) <- (256x144x128xf32, 128x384xf32) + matmul_5 = paddle._C_ops.matmul(reshape_11, parameter_286, False, False) + del parameter_286 + + # pd_op.add: (256x144x384xf32) <- (256x144x384xf32, 384xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_285) + del parameter_285 + + # pd_op.reshape: (256x144x3x4x32xf32) <- (256x144x384xf32, 5xi64) + reshape_219 = paddle._C_ops.reshape(add_7, full_int_array_14) + del full_int_array_14 + + # pd_op.transpose: (3x256x4x144x32xf32) <- (256x144x3x4x32xf32) + transpose_8 = paddle._C_ops.transpose(reshape_219, [2, 0, 3, 1, 4]) + del reshape_219 + + # pd_op.slice: (256x4x144x32xf32) <- (3x256x4x144x32xf32, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x4x144x32xf32) <- (3x256x4x144x32xf32, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x4x144x32xf32) <- (3x256x4x144x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x4x144x32xf32) <- (256x4x144x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_26, full_0, float("0"), True) + del slice_26 + + # pd_op.transpose: (256x4x32x144xf32) <- (256x4x144x32xf32) + transpose_9 = paddle._C_ops.transpose(slice_27, [0, 1, 3, 2]) + del slice_27 + + # pd_op.matmul: (256x4x144x144xf32) <- (256x4x144x32xf32, 256x4x32x144xf32) + matmul_6 = paddle._C_ops.matmul(scale_1, transpose_9, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_12 = paddle._C_ops.reshape(data_43, full_int_array_15) + del data_43 + + # pd_op.index_select: (20736x4xf32) <- (529x4xf32, 20736xi64) + index_select_1 = paddle._C_ops.index_select(data_46, reshape_12, 0) + del data_46 + + # pd_op.reshape: (144x144x4xf32) <- (20736x4xf32, 3xi64) + reshape_220 = paddle._C_ops.reshape(index_select_1, full_int_array_16) + + # pd_op.transpose: (4x144x144xf32) <- (144x144x4xf32) + transpose_10 = paddle._C_ops.transpose(reshape_220, [2, 0, 1]) + del reshape_220 + + # pd_op.unsqueeze: (1x4x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(transpose_10, full_int_array_0) + + # pd_op.add: (256x4x144x144xf32) <- (256x4x144x144xf32, 1x4x144x144xf32) + add_8 = paddle._C_ops.add(matmul_6, unsqueeze_1) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_38 = [4, 64, 4, 144, 144] + + # pd_op.reshape: (4x64x4x144x144xf32) <- (256x4x144x144xf32, 5xi64) + reshape_13 = paddle._C_ops.reshape(add_8, full_int_array_38) + del full_int_array_38 + + # pd_op.unsqueeze: (64x1x144x144xf32) <- (64x144x144xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(where_1, full_int_array_1) + del where_1 + + # pd_op.unsqueeze: (1x64x1x144x144xf32) <- (64x1x144x144xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(unsqueeze_39, full_int_array_0) + del unsqueeze_39 + + # pd_op.add: (4x64x4x144x144xf32) <- (4x64x4x144x144xf32, 1x64x1x144x144xf32) + add_9 = paddle._C_ops.add(reshape_13, unsqueeze_2) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_39 = [256, 4, 144, 144] + + # pd_op.reshape: (256x4x144x144xf32) <- (4x64x4x144x144xf32, 4xi64) + reshape_221 = paddle._C_ops.reshape(add_9, full_int_array_39) + del full_int_array_39 + + # pd_op.softmax: (256x4x144x144xf32) <- (256x4x144x144xf32) + softmax_1 = paddle._C_ops.softmax(reshape_221, -1) + del reshape_221 + + # pd_op.matmul: (256x4x144x32xf32) <- (256x4x144x144xf32, 256x4x144x32xf32) + matmul_125 = paddle._C_ops.matmul(softmax_1, slice_1, False, False) + + # pd_op.transpose: (256x144x4x32xf32) <- (256x4x144x32xf32) + transpose_11 = paddle._C_ops.transpose(matmul_125, [0, 2, 1, 3]) + del matmul_125 + + # pd_op.reshape: (256x144x128xf32) <- (256x144x4x32xf32, 3xi64) + reshape_14 = paddle._C_ops.reshape(transpose_11, full_int_array_17) + del full_int_array_17 + + # pd_op.matmul: (256x144x128xf32) <- (256x144x128xf32, 128x128xf32) + matmul_7 = paddle._C_ops.matmul(reshape_14, parameter_284, False, False) + del parameter_284 + + # pd_op.add: (256x144x128xf32) <- (256x144x128xf32, 128xf32) + add_10 = paddle._C_ops.add(matmul_7, parameter_283) + del parameter_283 + + # pd_op.reshape: (256x12x12x128xf32) <- (256x144x128xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(add_10, full_int_array_12) + del full_int_array_12 + + # pd_op.reshape: (4x8x8x12x12x128xf32) <- (256x12x12x128xf32, 6xi64) + reshape_222 = paddle._C_ops.reshape(reshape_15, full_int_array_18) + del full_int_array_18 + + # pd_op.transpose: (4x8x12x8x12x128xf32) <- (4x8x8x12x12x128xf32) + transpose_12 = paddle._C_ops.transpose(reshape_222, [0, 1, 3, 2, 4, 5]) + del reshape_222 + + # pd_op.reshape: (4x96x96x128xf32) <- (4x8x12x8x12x128xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(transpose_12, full_int_array_19) + del full_int_array_19 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [6, 6] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_265 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_232 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_212 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_192 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_172 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_152 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_132 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_112 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_92 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_72 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_39 = full_int_array_5 + + # pd_op.roll: (4x96x96x128xf32) <- (4x96x96x128xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_16, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x9216x128xf32) <- (4x96x96x128xf32, 3xi64) + reshape_17 = paddle._C_ops.reshape(roll_1, full_int_array_20) + del full_int_array_20 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], + float("0.978261"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_11 = full_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_40 = [4, 1, 1] + + # pd_op.full: (1xf32) <- () + full_29 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf32) <- () + full_30 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_0 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_171 = paddle._C_ops.add(full_1, uniform_0) + del uniform_0 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_0 = paddle._C_ops.floor(add_171) + del add_171 + + # pd_op.divide: (4x9216x128xf32) <- (4x9216x128xf32, xf32) + divide_0 = paddle._C_ops.divide(reshape_17, full_1) + + # pd_op.multiply: (4x9216x128xf32) <- (4x9216x128xf32, 4x1x1xf32) + multiply_0 = paddle._C_ops.multiply(divide_0, floor_0) + + # pd_op.add: (4x9216x128xf32) <- (4x9216x128xf32, 4x9216x128xf32) + add_11 = paddle._C_ops.add(add_6, multiply_0) + + # pd_op.layer_norm: (4x9216x128xf32, 4x9216xf32, 4x9216xf32) <- (4x9216x128xf32, 128xf32, 128xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (4x9216x512xf32) <- (4x9216x128xf32, 128x512xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del parameter_280 + + # pd_op.add: (4x9216x512xf32) <- (4x9216x512xf32, 512xf32) + add_12 = paddle._C_ops.add(matmul_8, parameter_279) + del parameter_279 + + # pd_op.gelu: (4x9216x512xf32) <- (4x9216x512xf32) + gelu_1 = paddle._C_ops.gelu(add_12, False) + + # pd_op.matmul: (4x9216x128xf32) <- (4x9216x512xf32, 512x128xf32) + matmul_9 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del parameter_278 + + # pd_op.add: (4x9216x128xf32) <- (4x9216x128xf32, 128xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_277) + del parameter_277 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_1 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_172 = paddle._C_ops.add(full_1, uniform_1) + del uniform_1 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_1 = paddle._C_ops.floor(add_172) + del add_172 + + # pd_op.divide: (4x9216x128xf32) <- (4x9216x128xf32, xf32) + divide_1 = paddle._C_ops.divide(add_13, full_1) + + # pd_op.multiply: (4x9216x128xf32) <- (4x9216x128xf32, 4x1x1xf32) + multiply_1 = paddle._C_ops.multiply(divide_1, floor_1) + + # pd_op.add: (4x9216x128xf32) <- (4x9216x128xf32, 4x9216x128xf32) + add_14 = paddle._C_ops.add(add_11, multiply_1) + + # pd_op.reshape: (4x96x96x128xf32) <- (4x9216x128xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_14, full_int_array_10) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_6 = [2, 2] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_245 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_242 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_239 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_236 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_52 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_49 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_46 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_43 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_20 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_17 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_15 = full_int_array_6 + + # pd_op.strided_slice: (4x48x48x128xf32) <- (4x96x96x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_7 = [1, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_237 = full_int_array_7 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_44 = full_int_array_7 + + # pd_op.strided_slice: (4x48x48x128xf32) <- (4x96x96x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_8 = [0, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_240 = full_int_array_8 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_47 = full_int_array_8 + + # pd_op.strided_slice: (4x48x48x128xf32) <- (4x96x96x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x48x48x128xf32) <- (4x96x96x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (4x96x96x128xf32) <- (4x96x96x128xf32, 4xi64) + reshape_223 = paddle._C_ops.reshape(reshape_18, full_int_array_10) + del full_int_array_10 + + # pd_op.full: (1xi32) <- () + full_2 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xi32) <- (1xi32) + assign_246 = full_2 + + # pd_op.assign: (1xi32) <- (1xi32) + assign_53 = full_2 + + # builtin.combine: ([4x48x48x128xf32, 4x48x48x128xf32, 4x48x48x128xf32, 4x48x48x128xf32]) <- (4x48x48x128xf32, 4x48x48x128xf32, 4x48x48x128xf32, 4x48x48x128xf32) + combine_0 = [strided_slice_0, strided_slice_1, strided_slice_2, strided_slice_3] + + # pd_op.concat: (4x48x48x512xf32) <- ([4x48x48x128xf32, 4x48x48x128xf32, 4x48x48x128xf32, 4x48x48x128xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_0, full_2) + del combine_0 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_41 = [4, -1, 512] + + # pd_op.reshape: (4x2304x512xf32) <- (4x48x48x512xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(concat_0, full_int_array_41) + del full_int_array_41 + + # pd_op.layer_norm: (4x2304x512xf32, 4x2304xf32, 4x2304xf32) <- (4x2304x512xf32, 512xf32, 512xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_19, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276 + + # pd_op.matmul: (4x2304x256xf32) <- (4x2304x512xf32, 512x256xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del parameter_274 + + # pd_op.layer_norm: (4x2304x256xf32, 4x2304xf32, 4x2304xf32) <- (4x2304x256xf32, 256xf32, 256xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_10, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_42 = [4, 48, 48, 256] + + # pd_op.reshape: (4x48x48x256xf32) <- (4x2304x256xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(layer_norm_18, full_int_array_42) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_43 = [4, 4, 12, 4, 12, 256] + + # pd_op.reshape: (4x4x12x4x12x256xf32) <- (4x48x48x256xf32, 6xi64) + reshape_224 = paddle._C_ops.reshape(reshape_20, full_int_array_43) + + # pd_op.transpose: (4x4x4x12x12x256xf32) <- (4x4x12x4x12x256xf32) + transpose_13 = paddle._C_ops.transpose(reshape_224, [0, 1, 3, 2, 4, 5]) + del reshape_224 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_44 = [-1, 12, 12, 256] + + # pd_op.reshape: (64x12x12x256xf32) <- (4x4x4x12x12x256xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(transpose_13, full_int_array_44) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_45 = [-1, 144, 256] + + # pd_op.reshape: (64x144x256xf32) <- (64x12x12x256xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(reshape_21, full_int_array_45) + + # pd_op.matmul: (64x144x768xf32) <- (64x144x256xf32, 256x768xf32) + matmul_11 = paddle._C_ops.matmul(reshape_22, parameter_271, False, False) + del parameter_271 + + # pd_op.add: (64x144x768xf32) <- (64x144x768xf32, 768xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_270) + del parameter_270 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_46 = [64, 144, 3, 8, 32] + + # pd_op.reshape: (64x144x3x8x32xf32) <- (64x144x768xf32, 5xi64) + reshape_225 = paddle._C_ops.reshape(add_15, full_int_array_46) + + # pd_op.transpose: (3x64x8x144x32xf32) <- (64x144x3x8x32xf32) + transpose_14 = paddle._C_ops.transpose(reshape_225, [2, 0, 3, 1, 4]) + del reshape_225 + + # pd_op.slice: (64x8x144x32xf32) <- (3x64x8x144x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x8x144x32xf32) <- (3x64x8x144x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x8x144x32xf32) <- (3x64x8x144x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x8x144x32xf32) <- (64x8x144x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_28, full_0, float("0"), True) + del slice_28 + + # pd_op.transpose: (64x8x32x144xf32) <- (64x8x144x32xf32) + transpose_15 = paddle._C_ops.transpose(slice_29, [0, 1, 3, 2]) + del slice_29 + + # pd_op.matmul: (64x8x144x144xf32) <- (64x8x144x32xf32, 64x8x32x144xf32) + matmul_12 = paddle._C_ops.matmul(scale_2, transpose_15, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_23 = paddle._C_ops.reshape(data_17, full_int_array_15) + del data_17 + + # pd_op.index_select: (20736x8xf32) <- (529x8xf32, 20736xi64) + index_select_2 = paddle._C_ops.index_select(data_20, reshape_23, 0) + del data_20 + + # pd_op.reshape: (144x144x8xf32) <- (20736x8xf32, 3xi64) + reshape_226 = paddle._C_ops.reshape(index_select_2, full_int_array_16) + + # pd_op.transpose: (8x144x144xf32) <- (144x144x8xf32) + transpose_16 = paddle._C_ops.transpose(reshape_226, [2, 0, 1]) + del reshape_226 + + # pd_op.unsqueeze: (1x8x144x144xf32) <- (8x144x144xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_16, full_int_array_0) + + # pd_op.add: (64x8x144x144xf32) <- (64x8x144x144xf32, 1x8x144x144xf32) + add_173 = paddle._C_ops.add(matmul_12, unsqueeze_3) + + # pd_op.softmax: (64x8x144x144xf32) <- (64x8x144x144xf32) + softmax_2 = paddle._C_ops.softmax(add_173, -1) + del add_173 + + # pd_op.matmul: (64x8x144x32xf32) <- (64x8x144x144xf32, 64x8x144x32xf32) + matmul_126 = paddle._C_ops.matmul(softmax_2, slice_2, False, False) + + # pd_op.transpose: (64x144x8x32xf32) <- (64x8x144x32xf32) + transpose_17 = paddle._C_ops.transpose(matmul_126, [0, 2, 1, 3]) + del matmul_126 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_47 = [64, 144, 256] + + # pd_op.reshape: (64x144x256xf32) <- (64x144x8x32xf32, 3xi64) + reshape_24 = paddle._C_ops.reshape(transpose_17, full_int_array_47) + + # pd_op.matmul: (64x144x256xf32) <- (64x144x256xf32, 256x256xf32) + matmul_13 = paddle._C_ops.matmul(reshape_24, parameter_269, False, False) + del parameter_269 + + # pd_op.add: (64x144x256xf32) <- (64x144x256xf32, 256xf32) + add_16 = paddle._C_ops.add(matmul_13, parameter_268) + del parameter_268 + + # pd_op.reshape: (64x12x12x256xf32) <- (64x144x256xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_16, full_int_array_44) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_48 = [-1, 4, 4, 12, 12, 256] + + # pd_op.reshape: (4x4x4x12x12x256xf32) <- (64x12x12x256xf32, 6xi64) + reshape_227 = paddle._C_ops.reshape(reshape_25, full_int_array_48) + + # pd_op.transpose: (4x4x12x4x12x256xf32) <- (4x4x4x12x12x256xf32) + transpose_18 = paddle._C_ops.transpose(reshape_227, [0, 1, 3, 2, 4, 5]) + del reshape_227 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_49 = [-1, 48, 48, 256] + + # pd_op.reshape: (4x48x48x256xf32) <- (4x4x12x4x12x256xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(transpose_18, full_int_array_49) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_50 = [4, 2304, 256] + + # pd_op.reshape: (4x2304x256xf32) <- (4x48x48x256xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, full_int_array_50) + + # pd_op.full: (xf32) <- () + full_3 = paddle._C_ops.full( + [], + float("0.956522"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_29 = full_3 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_2 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_174 = paddle._C_ops.add(full_3, uniform_2) + del uniform_2 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_2 = paddle._C_ops.floor(add_174) + del add_174 + + # pd_op.divide: (4x2304x256xf32) <- (4x2304x256xf32, xf32) + divide_2 = paddle._C_ops.divide(reshape_27, full_3) + + # pd_op.multiply: (4x2304x256xf32) <- (4x2304x256xf32, 4x1x1xf32) + multiply_2 = paddle._C_ops.multiply(divide_2, floor_2) + + # pd_op.add: (4x2304x256xf32) <- (4x2304x256xf32, 4x2304x256xf32) + add_17 = paddle._C_ops.add(matmul_10, multiply_2) + + # pd_op.layer_norm: (4x2304x256xf32, 4x2304xf32, 4x2304xf32) <- (4x2304x256xf32, 256xf32, 256xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (4x2304x1024xf32) <- (4x2304x256xf32, 256x1024xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del parameter_265 + + # pd_op.add: (4x2304x1024xf32) <- (4x2304x1024xf32, 1024xf32) + add_18 = paddle._C_ops.add(matmul_14, parameter_264) + del parameter_264 + + # pd_op.gelu: (4x2304x1024xf32) <- (4x2304x1024xf32) + gelu_2 = paddle._C_ops.gelu(add_18, False) + + # pd_op.matmul: (4x2304x256xf32) <- (4x2304x1024xf32, 1024x256xf32) + matmul_15 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del parameter_263 + + # pd_op.add: (4x2304x256xf32) <- (4x2304x256xf32, 256xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_262) + del parameter_262 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_3 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_175 = paddle._C_ops.add(full_3, uniform_3) + del uniform_3 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_3 = paddle._C_ops.floor(add_175) + del add_175 + + # pd_op.divide: (4x2304x256xf32) <- (4x2304x256xf32, xf32) + divide_3 = paddle._C_ops.divide(add_19, full_3) + + # pd_op.multiply: (4x2304x256xf32) <- (4x2304x256xf32, 4x1x1xf32) + multiply_3 = paddle._C_ops.multiply(divide_3, floor_3) + + # pd_op.add: (4x2304x256xf32) <- (4x2304x256xf32, 4x2304x256xf32) + add_20 = paddle._C_ops.add(add_17, multiply_3) + + # pd_op.layer_norm: (4x2304x256xf32, 4x2304xf32, 4x2304xf32) <- (4x2304x256xf32, 256xf32, 256xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # pd_op.reshape: (4x48x48x256xf32) <- (4x2304x256xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(layer_norm_24, full_int_array_42) + + # pd_op.roll: (4x48x48x256xf32) <- (4x48x48x256xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_28, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x4x12x4x12x256xf32) <- (4x48x48x256xf32, 6xi64) + reshape_228 = paddle._C_ops.reshape(roll_2, full_int_array_43) + del full_int_array_43 + + # pd_op.transpose: (4x4x4x12x12x256xf32) <- (4x4x12x4x12x256xf32) + transpose_19 = paddle._C_ops.transpose(reshape_228, [0, 1, 3, 2, 4, 5]) + del reshape_228 + + # pd_op.reshape: (64x12x12x256xf32) <- (4x4x4x12x12x256xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(transpose_19, full_int_array_44) + + # pd_op.reshape: (64x144x256xf32) <- (64x12x12x256xf32, 3xi64) + reshape_30 = paddle._C_ops.reshape(reshape_29, full_int_array_45) + del full_int_array_45 + + # pd_op.full: (1x48x48x1xf32) <- () + full_31 = paddle._C_ops.full( + [1, 48, 48, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_31, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_31 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_51 = [1, 4, 12, 4, 12, 1] + + # pd_op.reshape: (1x4x12x4x12x1xf32) <- (1x48x48x1xf32, 6xi64) + reshape_229 = paddle._C_ops.reshape(set_value__1, full_int_array_51) + del full_int_array_51 + + # pd_op.transpose: (1x4x4x12x12x1xf32) <- (1x4x12x4x12x1xf32) + transpose_147 = paddle._C_ops.transpose(reshape_229, [0, 1, 3, 2, 4, 5]) + del reshape_229 + + # pd_op.reshape: (16x12x12x1xf32) <- (1x4x4x12x12x1xf32, 4xi64) + reshape_230 = paddle._C_ops.reshape(transpose_147, full_int_array_36) + del transpose_147 + + # pd_op.reshape: (16x144xf32) <- (16x12x12x1xf32, 2xi64) + reshape_231 = paddle._C_ops.reshape(reshape_230, full_int_array_37) + del reshape_230 + + # pd_op.unsqueeze: (16x1x144xf32) <- (16x144xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_1) + + # pd_op.unsqueeze: (16x144x1xf32) <- (16x144xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_2) + del reshape_231 + + # pd_op.subtract: (16x144x144xf32) <- (16x1x144xf32, 16x144x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_40, unsqueeze_41) + del unsqueeze_40, unsqueeze_41 + + # pd_op.not_equal: (16x144x144xb) <- (16x144x144xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_26) + + # pd_op.full: (16x144x144xf32) <- () + full_32 = paddle._C_ops.full( + [16, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x144x144xf32) <- (16x144x144xb, 16x144x144xf32, 16x144x144xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_32, subtract_1) + del full_32, not_equal_1, subtract_1 + + # pd_op.equal: (16x144x144xb) <- (16x144x144xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_26) + + # pd_op.full: (16x144x144xf32) <- () + full_33 = paddle._C_ops.full( + [16, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x144x144xf32) <- (16x144x144xb, 16x144x144xf32, 16x144x144xf32) + where_3 = paddle._C_ops.where(equal_1, full_33, where_2) + del equal_1, full_33, where_2 + + # pd_op.matmul: (64x144x768xf32) <- (64x144x256xf32, 256x768xf32) + matmul_16 = paddle._C_ops.matmul(reshape_30, parameter_259, False, False) + del parameter_259 + + # pd_op.add: (64x144x768xf32) <- (64x144x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_16, parameter_258) + del parameter_258 + + # pd_op.reshape: (64x144x3x8x32xf32) <- (64x144x768xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_21, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (3x64x8x144x32xf32) <- (64x144x3x8x32xf32) + transpose_20 = paddle._C_ops.transpose(reshape_232, [2, 0, 3, 1, 4]) + del reshape_232 + + # pd_op.slice: (64x8x144x32xf32) <- (3x64x8x144x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x8x144x32xf32) <- (3x64x8x144x32xf32, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x8x144x32xf32) <- (3x64x8x144x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x8x144x32xf32) <- (64x8x144x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_30, full_0, float("0"), True) + del slice_30 + + # pd_op.transpose: (64x8x32x144xf32) <- (64x8x144x32xf32) + transpose_21 = paddle._C_ops.transpose(slice_31, [0, 1, 3, 2]) + del slice_31 + + # pd_op.matmul: (64x8x144x144xf32) <- (64x8x144x32xf32, 64x8x32x144xf32) + matmul_17 = paddle._C_ops.matmul(scale_3, transpose_21, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_31 = paddle._C_ops.reshape(data_25, full_int_array_15) + del data_25 + + # pd_op.index_select: (20736x8xf32) <- (529x8xf32, 20736xi64) + index_select_3 = paddle._C_ops.index_select(data_26, reshape_31, 0) + del data_26 + + # pd_op.reshape: (144x144x8xf32) <- (20736x8xf32, 3xi64) + reshape_233 = paddle._C_ops.reshape(index_select_3, full_int_array_16) + + # pd_op.transpose: (8x144x144xf32) <- (144x144x8xf32) + transpose_22 = paddle._C_ops.transpose(reshape_233, [2, 0, 1]) + del reshape_233 + + # pd_op.unsqueeze: (1x8x144x144xf32) <- (8x144x144xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_22, full_int_array_0) + + # pd_op.add: (64x8x144x144xf32) <- (64x8x144x144xf32, 1x8x144x144xf32) + add_22 = paddle._C_ops.add(matmul_17, unsqueeze_4) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_52 = [4, 16, 8, 144, 144] + + # pd_op.reshape: (4x16x8x144x144xf32) <- (64x8x144x144xf32, 5xi64) + reshape_32 = paddle._C_ops.reshape(add_22, full_int_array_52) + del full_int_array_52 + + # pd_op.unsqueeze: (16x1x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(where_3, full_int_array_1) + del where_3 + + # pd_op.unsqueeze: (1x16x1x144x144xf32) <- (16x1x144x144xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_42, full_int_array_0) + del unsqueeze_42 + + # pd_op.add: (4x16x8x144x144xf32) <- (4x16x8x144x144xf32, 1x16x1x144x144xf32) + add_23 = paddle._C_ops.add(reshape_32, unsqueeze_5) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_53 = [64, 8, 144, 144] + + # pd_op.reshape: (64x8x144x144xf32) <- (4x16x8x144x144xf32, 4xi64) + reshape_234 = paddle._C_ops.reshape(add_23, full_int_array_53) + del full_int_array_53 + + # pd_op.softmax: (64x8x144x144xf32) <- (64x8x144x144xf32) + softmax_3 = paddle._C_ops.softmax(reshape_234, -1) + del reshape_234 + + # pd_op.matmul: (64x8x144x32xf32) <- (64x8x144x144xf32, 64x8x144x32xf32) + matmul_127 = paddle._C_ops.matmul(softmax_3, slice_3, False, False) + + # pd_op.transpose: (64x144x8x32xf32) <- (64x8x144x32xf32) + transpose_23 = paddle._C_ops.transpose(matmul_127, [0, 2, 1, 3]) + del matmul_127 + + # pd_op.reshape: (64x144x256xf32) <- (64x144x8x32xf32, 3xi64) + reshape_33 = paddle._C_ops.reshape(transpose_23, full_int_array_47) + del full_int_array_47 + + # pd_op.matmul: (64x144x256xf32) <- (64x144x256xf32, 256x256xf32) + matmul_18 = paddle._C_ops.matmul(reshape_33, parameter_257, False, False) + del parameter_257 + + # pd_op.add: (64x144x256xf32) <- (64x144x256xf32, 256xf32) + add_24 = paddle._C_ops.add(matmul_18, parameter_256) + del parameter_256 + + # pd_op.reshape: (64x12x12x256xf32) <- (64x144x256xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_24, full_int_array_44) + del full_int_array_44 + + # pd_op.reshape: (4x4x4x12x12x256xf32) <- (64x12x12x256xf32, 6xi64) + reshape_235 = paddle._C_ops.reshape(reshape_34, full_int_array_48) + del full_int_array_48 + + # pd_op.transpose: (4x4x12x4x12x256xf32) <- (4x4x4x12x12x256xf32) + transpose_24 = paddle._C_ops.transpose(reshape_235, [0, 1, 3, 2, 4, 5]) + del reshape_235 + + # pd_op.reshape: (4x48x48x256xf32) <- (4x4x12x4x12x256xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_24, full_int_array_49) + del full_int_array_49 + + # pd_op.roll: (4x48x48x256xf32) <- (4x48x48x256xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_35, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x2304x256xf32) <- (4x48x48x256xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(roll_3, full_int_array_50) + del full_int_array_50 + + # pd_op.full: (xf32) <- () + full_4 = paddle._C_ops.full( + [], + float("0.934783"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_40 = full_4 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_4 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_176 = paddle._C_ops.add(full_4, uniform_4) + del uniform_4 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_4 = paddle._C_ops.floor(add_176) + del add_176 + + # pd_op.divide: (4x2304x256xf32) <- (4x2304x256xf32, xf32) + divide_4 = paddle._C_ops.divide(reshape_36, full_4) + + # pd_op.multiply: (4x2304x256xf32) <- (4x2304x256xf32, 4x1x1xf32) + multiply_4 = paddle._C_ops.multiply(divide_4, floor_4) + + # pd_op.add: (4x2304x256xf32) <- (4x2304x256xf32, 4x2304x256xf32) + add_25 = paddle._C_ops.add(add_20, multiply_4) + + # pd_op.layer_norm: (4x2304x256xf32, 4x2304xf32, 4x2304xf32) <- (4x2304x256xf32, 256xf32, 256xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (4x2304x1024xf32) <- (4x2304x256xf32, 256x1024xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del parameter_253 + + # pd_op.add: (4x2304x1024xf32) <- (4x2304x1024xf32, 1024xf32) + add_26 = paddle._C_ops.add(matmul_19, parameter_252) + del parameter_252 + + # pd_op.gelu: (4x2304x1024xf32) <- (4x2304x1024xf32) + gelu_3 = paddle._C_ops.gelu(add_26, False) + + # pd_op.matmul: (4x2304x256xf32) <- (4x2304x1024xf32, 1024x256xf32) + matmul_20 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del parameter_251 + + # pd_op.add: (4x2304x256xf32) <- (4x2304x256xf32, 256xf32) + add_27 = paddle._C_ops.add(matmul_20, parameter_250) + del parameter_250 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_5 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_177 = paddle._C_ops.add(full_4, uniform_5) + del uniform_5 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_5 = paddle._C_ops.floor(add_177) + del add_177 + + # pd_op.divide: (4x2304x256xf32) <- (4x2304x256xf32, xf32) + divide_5 = paddle._C_ops.divide(add_27, full_4) + + # pd_op.multiply: (4x2304x256xf32) <- (4x2304x256xf32, 4x1x1xf32) + multiply_5 = paddle._C_ops.multiply(divide_5, floor_5) + + # pd_op.add: (4x2304x256xf32) <- (4x2304x256xf32, 4x2304x256xf32) + add_28 = paddle._C_ops.add(add_25, multiply_5) + + # pd_op.reshape: (4x48x48x256xf32) <- (4x2304x256xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_28, full_int_array_42) + + # pd_op.strided_slice: (4x24x24x256xf32) <- (4x48x48x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x24x24x256xf32) <- (4x48x48x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x24x24x256xf32) <- (4x48x48x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x24x24x256xf32) <- (4x48x48x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (4x48x48x256xf32) <- (4x48x48x256xf32, 4xi64) + reshape_236 = paddle._C_ops.reshape(reshape_37, full_int_array_42) + del full_int_array_42 + + # builtin.combine: ([4x24x24x256xf32, 4x24x24x256xf32, 4x24x24x256xf32, 4x24x24x256xf32]) <- (4x24x24x256xf32, 4x24x24x256xf32, 4x24x24x256xf32, 4x24x24x256xf32) + combine_1 = [strided_slice_4, strided_slice_5, strided_slice_6, strided_slice_7] + + # pd_op.concat: (4x24x24x1024xf32) <- ([4x24x24x256xf32, 4x24x24x256xf32, 4x24x24x256xf32, 4x24x24x256xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_1, full_2) + del combine_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_54 = [4, -1, 1024] + + # pd_op.reshape: (4x576x1024xf32) <- (4x24x24x1024xf32, 3xi64) + reshape_38 = paddle._C_ops.reshape(concat_1, full_int_array_54) + del full_int_array_54 + + # pd_op.layer_norm: (4x576x1024xf32, 4x576xf32, 4x576xf32) <- (4x576x1024xf32, 1024xf32, 1024xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_38, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249 + + # pd_op.matmul: (4x576x512xf32) <- (4x576x1024xf32, 1024x512xf32) + matmul_21 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del parameter_247 + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_21, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_55 = [4, 24, 24, 512] + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(layer_norm_33, full_int_array_55) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_56 = [4, 2, 12, 2, 12, 512] + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_237 = paddle._C_ops.reshape(reshape_39, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_25 = paddle._C_ops.transpose(reshape_237, [0, 1, 3, 2, 4, 5]) + del reshape_237 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_57 = [-1, 12, 12, 512] + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(transpose_25, full_int_array_57) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_58 = [-1, 144, 512] + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(reshape_40, full_int_array_58) + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_22 = paddle._C_ops.matmul(reshape_41, parameter_244, False, False) + del parameter_244 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_29 = paddle._C_ops.add(matmul_22, parameter_243) + del parameter_243 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_59 = [16, 144, 3, 16, 32] + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_238 = paddle._C_ops.reshape(add_29, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_26 = paddle._C_ops.transpose(reshape_238, [2, 0, 3, 1, 4]) + del reshape_238 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_32, full_0, float("0"), True) + del slice_32 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_27 = paddle._C_ops.transpose(slice_33, [0, 1, 3, 2]) + del slice_33 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_23 = paddle._C_ops.matmul(scale_4, transpose_27, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_42 = paddle._C_ops.reshape(data_27, full_int_array_15) + del data_27 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_4 = paddle._C_ops.index_select(data_28, reshape_42, 0) + del data_28 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_239 = paddle._C_ops.reshape(index_select_4, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_28 = paddle._C_ops.transpose(reshape_239, [2, 0, 1]) + del reshape_239 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_28, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_178 = paddle._C_ops.add(matmul_23, unsqueeze_6) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_4 = paddle._C_ops.softmax(add_178, -1) + del add_178 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_128 = paddle._C_ops.matmul(softmax_4, slice_4, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_29 = paddle._C_ops.transpose(matmul_128, [0, 2, 1, 3]) + del matmul_128 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_60 = [16, 144, 512] + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_29, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_24 = paddle._C_ops.matmul(reshape_43, parameter_242, False, False) + del parameter_242 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_241) + del parameter_241 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_30, full_int_array_57) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_61 = [-1, 2, 2, 12, 12, 512] + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_44, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_30 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_62 = [-1, 24, 24, 512] + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(transpose_30, full_int_array_62) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_63 = [4, 576, 512] + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_46 = paddle._C_ops.reshape(reshape_45, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_5 = paddle._C_ops.full( + [], + float("0.913043"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_62 = full_5 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_6 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_179 = paddle._C_ops.add(full_5, uniform_6) + del uniform_6 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_6 = paddle._C_ops.floor(add_179) + del add_179 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_6 = paddle._C_ops.divide(reshape_46, full_5) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_6 = paddle._C_ops.multiply(divide_6, floor_6) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_31 = paddle._C_ops.add(matmul_21, multiply_6) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_31, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del parameter_238 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_32 = paddle._C_ops.add(matmul_25, parameter_237) + del parameter_237 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_4 = paddle._C_ops.gelu(add_32, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_26 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del parameter_236 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_33 = paddle._C_ops.add(matmul_26, parameter_235) + del parameter_235 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_7 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_180 = paddle._C_ops.add(full_5, uniform_7) + del uniform_7 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_7 = paddle._C_ops.floor(add_180) + del add_180 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_7 = paddle._C_ops.divide(add_33, full_5) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_7 = paddle._C_ops.multiply(divide_7, floor_7) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_34 = paddle._C_ops.add(add_31, multiply_7) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(layer_norm_39, full_int_array_55) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_47, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_241 = paddle._C_ops.reshape(roll_4, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_31 = paddle._C_ops.transpose(reshape_241, [0, 1, 3, 2, 4, 5]) + del reshape_241 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(transpose_31, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_49 = paddle._C_ops.reshape(reshape_48, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_34 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_34, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_34 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_64 = [1, 2, 12, 2, 12, 1] + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_242 = paddle._C_ops.reshape(set_value__2, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_148 = paddle._C_ops.transpose(reshape_242, [0, 1, 3, 2, 4, 5]) + del reshape_242 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_243 = paddle._C_ops.reshape(transpose_148, full_int_array_36) + del transpose_148 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_244 = paddle._C_ops.reshape(reshape_243, full_int_array_37) + del reshape_243 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_2) + del reshape_244 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_26) + + # pd_op.full: (4x144x144xf32) <- () + full_35 = paddle._C_ops.full( + [4, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_35, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_26) + + # pd_op.full: (4x144x144xf32) <- () + full_36 = paddle._C_ops.full( + [4, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_5 = paddle._C_ops.where(equal_2, full_36, where_4) + del equal_2, where_4 + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_27 = paddle._C_ops.matmul(reshape_49, parameter_232, False, False) + del parameter_232 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_35 = paddle._C_ops.add(matmul_27, parameter_231) + del parameter_231 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_245 = paddle._C_ops.reshape(add_35, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_32 = paddle._C_ops.transpose(reshape_245, [2, 0, 3, 1, 4]) + del reshape_245 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_34, full_0, float("0"), True) + del slice_34 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_33 = paddle._C_ops.transpose(slice_35, [0, 1, 3, 2]) + del slice_35 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_28 = paddle._C_ops.matmul(scale_5, transpose_33, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_50 = paddle._C_ops.reshape(data_29, full_int_array_15) + del data_29 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_5 = paddle._C_ops.index_select(data_30, reshape_50, 0) + del data_30 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(index_select_5, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_34 = paddle._C_ops.transpose(reshape_246, [2, 0, 1]) + del reshape_246 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(transpose_34, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_36 = paddle._C_ops.add(matmul_28, unsqueeze_7) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_65 = [4, 4, 16, 144, 144] + + # pd_op.reshape: (4x4x16x144x144xf32) <- (16x16x144x144xf32, 5xi64) + reshape_51 = paddle._C_ops.reshape(add_36, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(where_5, full_int_array_1) + del where_5 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(unsqueeze_45, full_int_array_0) + del unsqueeze_45 + + # pd_op.add: (4x4x16x144x144xf32) <- (4x4x16x144x144xf32, 1x4x1x144x144xf32) + add_37 = paddle._C_ops.add(reshape_51, unsqueeze_8) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_66 = [16, 16, 144, 144] + + # pd_op.reshape: (16x16x144x144xf32) <- (4x4x16x144x144xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_37, full_int_array_66) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_5 = paddle._C_ops.softmax(reshape_247, -1) + del reshape_247 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_129 = paddle._C_ops.matmul(softmax_5, slice_5, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_35 = paddle._C_ops.transpose(matmul_129, [0, 2, 1, 3]) + del matmul_129 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_52 = paddle._C_ops.reshape(transpose_35, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_29 = paddle._C_ops.matmul(reshape_52, parameter_230, False, False) + del parameter_230 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_38 = paddle._C_ops.add(matmul_29, parameter_229) + del parameter_229 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(add_38, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_53, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_36 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(transpose_36, full_int_array_62) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_54, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(roll_5, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_6 = paddle._C_ops.full( + [], + float("0.891304"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_73 = full_6 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_8 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_181 = paddle._C_ops.add(full_6, uniform_8) + del uniform_8 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_8 = paddle._C_ops.floor(add_181) + del add_181 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_8 = paddle._C_ops.divide(reshape_55, full_6) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_8 = paddle._C_ops.multiply(divide_8, floor_8) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_39 = paddle._C_ops.add(add_34, multiply_8) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_39, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del parameter_226 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_40 = paddle._C_ops.add(matmul_30, parameter_225) + del parameter_225 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_5 = paddle._C_ops.gelu(add_40, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_31 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del parameter_224 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_41 = paddle._C_ops.add(matmul_31, parameter_223) + del parameter_223 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_9 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_182 = paddle._C_ops.add(full_6, uniform_9) + del uniform_9 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_9 = paddle._C_ops.floor(add_182) + del add_182 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_9 = paddle._C_ops.divide(add_41, full_6) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_9 = paddle._C_ops.multiply(divide_9, floor_9) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_42 = paddle._C_ops.add(add_39, multiply_9) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(layer_norm_45, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_249 = paddle._C_ops.reshape(reshape_56, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_37 = paddle._C_ops.transpose(reshape_249, [0, 1, 3, 2, 4, 5]) + del reshape_249 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(transpose_37, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_58 = paddle._C_ops.reshape(reshape_57, full_int_array_58) + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_32 = paddle._C_ops.matmul(reshape_58, parameter_220, False, False) + del parameter_220 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_43 = paddle._C_ops.add(matmul_32, parameter_219) + del parameter_219 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_250 = paddle._C_ops.reshape(add_43, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_38 = paddle._C_ops.transpose(reshape_250, [2, 0, 3, 1, 4]) + del reshape_250 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_36, full_0, float("0"), True) + del slice_36 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_39 = paddle._C_ops.transpose(slice_37, [0, 1, 3, 2]) + del slice_37 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_33 = paddle._C_ops.matmul(scale_6, transpose_39, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_59 = paddle._C_ops.reshape(data_31, full_int_array_15) + del data_31 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_6 = paddle._C_ops.index_select(data_32, reshape_59, 0) + del data_32 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_251 = paddle._C_ops.reshape(index_select_6, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_40 = paddle._C_ops.transpose(reshape_251, [2, 0, 1]) + del reshape_251 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_40, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_183 = paddle._C_ops.add(matmul_33, unsqueeze_9) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_6 = paddle._C_ops.softmax(add_183, -1) + del add_183 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_6, slice_6, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_41 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_60 = paddle._C_ops.reshape(transpose_41, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_34 = paddle._C_ops.matmul(reshape_60, parameter_218, False, False) + del parameter_218 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_44 = paddle._C_ops.add(matmul_34, parameter_217) + del parameter_217 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(add_44, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(reshape_61, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_42 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(transpose_42, full_int_array_62) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_7 = paddle._C_ops.full( + [], + float("0.869565"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_82 = full_7 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_10 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_184 = paddle._C_ops.add(full_7, uniform_10) + del uniform_10 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_10 = paddle._C_ops.floor(add_184) + del add_184 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_10 = paddle._C_ops.divide(reshape_63, full_7) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_10 = paddle._C_ops.multiply(divide_10, floor_10) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_45 = paddle._C_ops.add(add_42, multiply_10) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del parameter_214 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_46 = paddle._C_ops.add(matmul_35, parameter_213) + del parameter_213 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_6 = paddle._C_ops.gelu(add_46, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_36 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del parameter_212 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_47 = paddle._C_ops.add(matmul_36, parameter_211) + del parameter_211 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_11 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_185 = paddle._C_ops.add(full_7, uniform_11) + del uniform_11 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_11 = paddle._C_ops.floor(add_185) + del add_185 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_11 = paddle._C_ops.divide(add_47, full_7) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_11 = paddle._C_ops.multiply(divide_11, floor_11) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_48 = paddle._C_ops.add(add_45, multiply_11) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(layer_norm_51, full_int_array_55) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_64, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_253 = paddle._C_ops.reshape(roll_6, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_43 = paddle._C_ops.transpose(reshape_253, [0, 1, 3, 2, 4, 5]) + del reshape_253 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(transpose_43, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_37 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_37, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_37 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_254 = paddle._C_ops.reshape(set_value__3, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_149 = paddle._C_ops.transpose(reshape_254, [0, 1, 3, 2, 4, 5]) + del reshape_254 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_255 = paddle._C_ops.reshape(transpose_149, full_int_array_36) + del transpose_149 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_256 = paddle._C_ops.reshape(reshape_255, full_int_array_37) + del reshape_255 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_2) + del reshape_256 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_46, unsqueeze_47) + del unsqueeze_46, unsqueeze_47 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_35, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_7 = paddle._C_ops.where(equal_3, full_36, where_6) + del equal_3, where_6 + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_37 = paddle._C_ops.matmul(reshape_66, parameter_208, False, False) + del parameter_208 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_49 = paddle._C_ops.add(matmul_37, parameter_207) + del parameter_207 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_257 = paddle._C_ops.reshape(add_49, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_44 = paddle._C_ops.transpose(reshape_257, [2, 0, 3, 1, 4]) + del reshape_257 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_38, full_0, float("0"), True) + del slice_38 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_45 = paddle._C_ops.transpose(slice_39, [0, 1, 3, 2]) + del slice_39 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_38 = paddle._C_ops.matmul(scale_7, transpose_45, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_67 = paddle._C_ops.reshape(data_33, full_int_array_15) + del data_33 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_7 = paddle._C_ops.index_select(data_34, reshape_67, 0) + del data_34 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_258 = paddle._C_ops.reshape(index_select_7, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_46 = paddle._C_ops.transpose(reshape_258, [2, 0, 1]) + del reshape_258 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(transpose_46, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_50 = paddle._C_ops.add(matmul_38, unsqueeze_10) + + # pd_op.reshape: (4x4x16x144x144xf32) <- (16x16x144x144xf32, 5xi64) + reshape_68 = paddle._C_ops.reshape(add_50, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(where_7, full_int_array_1) + del where_7 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_48, full_int_array_0) + del unsqueeze_48 + + # pd_op.add: (4x4x16x144x144xf32) <- (4x4x16x144x144xf32, 1x4x1x144x144xf32) + add_51 = paddle._C_ops.add(reshape_68, unsqueeze_11) + + # pd_op.reshape: (16x16x144x144xf32) <- (4x4x16x144x144xf32, 4xi64) + reshape_259 = paddle._C_ops.reshape(add_51, full_int_array_66) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_7 = paddle._C_ops.softmax(reshape_259, -1) + del reshape_259 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_131 = paddle._C_ops.matmul(softmax_7, slice_7, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_47 = paddle._C_ops.transpose(matmul_131, [0, 2, 1, 3]) + del matmul_131 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(transpose_47, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_39 = paddle._C_ops.matmul(reshape_69, parameter_206, False, False) + del parameter_206 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_52 = paddle._C_ops.add(matmul_39, parameter_205) + del parameter_205 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(add_52, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_260 = paddle._C_ops.reshape(reshape_70, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_48 = paddle._C_ops.transpose(reshape_260, [0, 1, 3, 2, 4, 5]) + del reshape_260 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(transpose_48, full_int_array_62) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_71, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(roll_7, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_8 = paddle._C_ops.full( + [], + float("0.847826"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_93 = full_8 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_12 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_186 = paddle._C_ops.add(full_8, uniform_12) + del uniform_12 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_12 = paddle._C_ops.floor(add_186) + del add_186 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_12 = paddle._C_ops.divide(reshape_72, full_8) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_12 = paddle._C_ops.multiply(divide_12, floor_12) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_53 = paddle._C_ops.add(add_48, multiply_12) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del parameter_202 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_54 = paddle._C_ops.add(matmul_40, parameter_201) + del parameter_201 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_7 = paddle._C_ops.gelu(add_54, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_41 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del parameter_200 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_55 = paddle._C_ops.add(matmul_41, parameter_199) + del parameter_199 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_13 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_187 = paddle._C_ops.add(full_8, uniform_13) + del uniform_13 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_13 = paddle._C_ops.floor(add_187) + del add_187 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_13 = paddle._C_ops.divide(add_55, full_8) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_13 = paddle._C_ops.multiply(divide_13, floor_13) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_56 = paddle._C_ops.add(add_53, multiply_13) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(layer_norm_57, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_261 = paddle._C_ops.reshape(reshape_73, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_49 = paddle._C_ops.transpose(reshape_261, [0, 1, 3, 2, 4, 5]) + del reshape_261 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(transpose_49, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_75 = paddle._C_ops.reshape(reshape_74, full_int_array_58) + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_42 = paddle._C_ops.matmul(reshape_75, parameter_196, False, False) + del parameter_196 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_57 = paddle._C_ops.add(matmul_42, parameter_195) + del parameter_195 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_262 = paddle._C_ops.reshape(add_57, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_50 = paddle._C_ops.transpose(reshape_262, [2, 0, 3, 1, 4]) + del reshape_262 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_40, full_0, float("0"), True) + del slice_40 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_51 = paddle._C_ops.transpose(slice_41, [0, 1, 3, 2]) + del slice_41 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_43 = paddle._C_ops.matmul(scale_8, transpose_51, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_76 = paddle._C_ops.reshape(data_35, full_int_array_15) + del data_35 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_8 = paddle._C_ops.index_select(data_36, reshape_76, 0) + del data_36 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(index_select_8, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_52 = paddle._C_ops.transpose(reshape_263, [2, 0, 1]) + del reshape_263 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_52, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_188 = paddle._C_ops.add(matmul_43, unsqueeze_12) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_8 = paddle._C_ops.softmax(add_188, -1) + del add_188 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_132 = paddle._C_ops.matmul(softmax_8, slice_8, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_53 = paddle._C_ops.transpose(matmul_132, [0, 2, 1, 3]) + del matmul_132 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_77 = paddle._C_ops.reshape(transpose_53, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_44 = paddle._C_ops.matmul(reshape_77, parameter_194, False, False) + del parameter_194 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_58 = paddle._C_ops.add(matmul_44, parameter_193) + del parameter_193 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(add_58, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_264 = paddle._C_ops.reshape(reshape_78, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_54 = paddle._C_ops.transpose(reshape_264, [0, 1, 3, 2, 4, 5]) + del reshape_264 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_54, full_int_array_62) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_9 = paddle._C_ops.full( + [], + float("0.826087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_102 = full_9 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_14 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_189 = paddle._C_ops.add(full_9, uniform_14) + del uniform_14 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_14 = paddle._C_ops.floor(add_189) + del add_189 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_14 = paddle._C_ops.divide(reshape_80, full_9) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_14 = paddle._C_ops.multiply(divide_14, floor_14) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_59 = paddle._C_ops.add(add_56, multiply_14) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_59, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_45 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_60 = paddle._C_ops.add(matmul_45, parameter_189) + del parameter_189 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_8 = paddle._C_ops.gelu(add_60, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_46 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del parameter_188 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_61 = paddle._C_ops.add(matmul_46, parameter_187) + del parameter_187 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_15 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_190 = paddle._C_ops.add(full_9, uniform_15) + del uniform_15 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_15 = paddle._C_ops.floor(add_190) + del add_190 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_15 = paddle._C_ops.divide(add_61, full_9) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_15 = paddle._C_ops.multiply(divide_15, floor_15) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_62 = paddle._C_ops.add(add_59, multiply_15) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(layer_norm_63, full_int_array_55) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_81, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(roll_8, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_55 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_55, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_38 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_38, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_38 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_266 = paddle._C_ops.reshape(set_value__4, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_150 = paddle._C_ops.transpose(reshape_266, [0, 1, 3, 2, 4, 5]) + del reshape_266 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_267 = paddle._C_ops.reshape(transpose_150, full_int_array_36) + del transpose_150 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_268 = paddle._C_ops.reshape(reshape_267, full_int_array_37) + del reshape_267 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_2) + del reshape_268 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_35, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_9 = paddle._C_ops.where(equal_4, full_36, where_8) + del equal_4, where_8 + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_47 = paddle._C_ops.matmul(reshape_83, parameter_184, False, False) + del parameter_184 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_63 = paddle._C_ops.add(matmul_47, parameter_183) + del parameter_183 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_269 = paddle._C_ops.reshape(add_63, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_56 = paddle._C_ops.transpose(reshape_269, [2, 0, 3, 1, 4]) + del reshape_269 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_42, full_0, float("0"), True) + del slice_42 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_57 = paddle._C_ops.transpose(slice_43, [0, 1, 3, 2]) + del slice_43 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_48 = paddle._C_ops.matmul(scale_9, transpose_57, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_84 = paddle._C_ops.reshape(data_37, full_int_array_15) + del data_37 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_9 = paddle._C_ops.index_select(data_38, reshape_84, 0) + del data_38 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_270 = paddle._C_ops.reshape(index_select_9, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_58 = paddle._C_ops.transpose(reshape_270, [2, 0, 1]) + del reshape_270 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(transpose_58, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_64 = paddle._C_ops.add(matmul_48, unsqueeze_13) + + # pd_op.reshape: (4x4x16x144x144xf32) <- (16x16x144x144xf32, 5xi64) + reshape_85 = paddle._C_ops.reshape(add_64, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(where_9, full_int_array_1) + del where_9 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(unsqueeze_51, full_int_array_0) + del unsqueeze_51 + + # pd_op.add: (4x4x16x144x144xf32) <- (4x4x16x144x144xf32, 1x4x1x144x144xf32) + add_65 = paddle._C_ops.add(reshape_85, unsqueeze_14) + + # pd_op.reshape: (16x16x144x144xf32) <- (4x4x16x144x144xf32, 4xi64) + reshape_271 = paddle._C_ops.reshape(add_65, full_int_array_66) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_9 = paddle._C_ops.softmax(reshape_271, -1) + del reshape_271 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_133 = paddle._C_ops.matmul(softmax_9, slice_9, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_59 = paddle._C_ops.transpose(matmul_133, [0, 2, 1, 3]) + del matmul_133 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(transpose_59, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_49 = paddle._C_ops.matmul(reshape_86, parameter_182, False, False) + del parameter_182 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_66 = paddle._C_ops.add(matmul_49, parameter_181) + del parameter_181 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(add_66, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_272 = paddle._C_ops.reshape(reshape_87, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_60 = paddle._C_ops.transpose(reshape_272, [0, 1, 3, 2, 4, 5]) + del reshape_272 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(transpose_60, full_int_array_62) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_88, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(roll_9, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_10 = paddle._C_ops.full( + [], + float("0.804348"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_113 = full_10 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_16 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_191 = paddle._C_ops.add(full_10, uniform_16) + del uniform_16 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_16 = paddle._C_ops.floor(add_191) + del add_191 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_16 = paddle._C_ops.divide(reshape_89, full_10) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_16 = paddle._C_ops.multiply(divide_16, floor_16) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_67 = paddle._C_ops.add(add_62, multiply_16) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_67, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del parameter_178 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_68 = paddle._C_ops.add(matmul_50, parameter_177) + del parameter_177 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_9 = paddle._C_ops.gelu(add_68, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_51 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del parameter_176 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_69 = paddle._C_ops.add(matmul_51, parameter_175) + del parameter_175 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_17 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_192 = paddle._C_ops.add(full_10, uniform_17) + del uniform_17 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_17 = paddle._C_ops.floor(add_192) + del add_192 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_17 = paddle._C_ops.divide(add_69, full_10) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_17 = paddle._C_ops.multiply(divide_17, floor_17) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_70 = paddle._C_ops.add(add_67, multiply_17) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(layer_norm_69, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_273 = paddle._C_ops.reshape(reshape_90, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_61 = paddle._C_ops.transpose(reshape_273, [0, 1, 3, 2, 4, 5]) + del reshape_273 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_91 = paddle._C_ops.reshape(transpose_61, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_92 = paddle._C_ops.reshape(reshape_91, full_int_array_58) + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_52 = paddle._C_ops.matmul(reshape_92, parameter_172, False, False) + del parameter_172 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_71 = paddle._C_ops.add(matmul_52, parameter_171) + del parameter_171 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_274 = paddle._C_ops.reshape(add_71, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_62 = paddle._C_ops.transpose(reshape_274, [2, 0, 3, 1, 4]) + del reshape_274 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_44, full_0, float("0"), True) + del slice_44 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_63 = paddle._C_ops.transpose(slice_45, [0, 1, 3, 2]) + del slice_45 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_53 = paddle._C_ops.matmul(scale_10, transpose_63, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_93 = paddle._C_ops.reshape(data_39, full_int_array_15) + del data_39 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_10 = paddle._C_ops.index_select(data_40, reshape_93, 0) + del data_40 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(index_select_10, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_64 = paddle._C_ops.transpose(reshape_275, [2, 0, 1]) + del reshape_275 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_64, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_193 = paddle._C_ops.add(matmul_53, unsqueeze_15) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_10 = paddle._C_ops.softmax(add_193, -1) + del add_193 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_134 = paddle._C_ops.matmul(softmax_10, slice_10, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_65 = paddle._C_ops.transpose(matmul_134, [0, 2, 1, 3]) + del matmul_134 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_94 = paddle._C_ops.reshape(transpose_65, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_54 = paddle._C_ops.matmul(reshape_94, parameter_170, False, False) + del parameter_170 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_72 = paddle._C_ops.add(matmul_54, parameter_169) + del parameter_169 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_95 = paddle._C_ops.reshape(add_72, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_276 = paddle._C_ops.reshape(reshape_95, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_66 = paddle._C_ops.transpose(reshape_276, [0, 1, 3, 2, 4, 5]) + del reshape_276 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_66, full_int_array_62) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], + float("0.782609"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_122 = full_11 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_18 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_194 = paddle._C_ops.add(full_11, uniform_18) + del uniform_18 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_18 = paddle._C_ops.floor(add_194) + del add_194 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_18 = paddle._C_ops.divide(reshape_97, full_11) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_18 = paddle._C_ops.multiply(divide_18, floor_18) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_73 = paddle._C_ops.add(add_70, multiply_18) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del parameter_166 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_74 = paddle._C_ops.add(matmul_55, parameter_165) + del parameter_165 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_10 = paddle._C_ops.gelu(add_74, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_56 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del parameter_164 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_75 = paddle._C_ops.add(matmul_56, parameter_163) + del parameter_163 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_19 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_195 = paddle._C_ops.add(full_11, uniform_19) + del uniform_19 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_19 = paddle._C_ops.floor(add_195) + del add_195 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_19 = paddle._C_ops.divide(add_75, full_11) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_19 = paddle._C_ops.multiply(divide_19, floor_19) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_76 = paddle._C_ops.add(add_73, multiply_19) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(layer_norm_75, full_int_array_55) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_98, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(roll_10, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_67 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_99 = paddle._C_ops.reshape(transpose_67, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(reshape_99, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_39 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_39, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_39 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_278 = paddle._C_ops.reshape(set_value__5, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_278, [0, 1, 3, 2, 4, 5]) + del reshape_278 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_279 = paddle._C_ops.reshape(transpose_151, full_int_array_36) + del transpose_151 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_280 = paddle._C_ops.reshape(reshape_279, full_int_array_37) + del reshape_279 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_2) + del reshape_280 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_52, unsqueeze_53) + del unsqueeze_52, unsqueeze_53 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_35, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_11 = paddle._C_ops.where(equal_5, full_36, where_10) + del equal_5, where_10 + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_57 = paddle._C_ops.matmul(reshape_100, parameter_160, False, False) + del parameter_160 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_77 = paddle._C_ops.add(matmul_57, parameter_159) + del parameter_159 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_281 = paddle._C_ops.reshape(add_77, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_68 = paddle._C_ops.transpose(reshape_281, [2, 0, 3, 1, 4]) + del reshape_281 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_46, full_0, float("0"), True) + del slice_46 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_69 = paddle._C_ops.transpose(slice_47, [0, 1, 3, 2]) + del slice_47 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_58 = paddle._C_ops.matmul(scale_11, transpose_69, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_101 = paddle._C_ops.reshape(data_41, full_int_array_15) + del data_41 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_11 = paddle._C_ops.index_select(data_42, reshape_101, 0) + del data_42 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_282 = paddle._C_ops.reshape(index_select_11, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_70 = paddle._C_ops.transpose(reshape_282, [2, 0, 1]) + del reshape_282 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(transpose_70, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_78 = paddle._C_ops.add(matmul_58, unsqueeze_16) + + # pd_op.reshape: (4x4x16x144x144xf32) <- (16x16x144x144xf32, 5xi64) + reshape_102 = paddle._C_ops.reshape(add_78, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(where_11, full_int_array_1) + del where_11 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_54, full_int_array_0) + del unsqueeze_54 + + # pd_op.add: (4x4x16x144x144xf32) <- (4x4x16x144x144xf32, 1x4x1x144x144xf32) + add_79 = paddle._C_ops.add(reshape_102, unsqueeze_17) + + # pd_op.reshape: (16x16x144x144xf32) <- (4x4x16x144x144xf32, 4xi64) + reshape_283 = paddle._C_ops.reshape(add_79, full_int_array_66) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_11 = paddle._C_ops.softmax(reshape_283, -1) + del reshape_283 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_135 = paddle._C_ops.matmul(softmax_11, slice_11, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_71 = paddle._C_ops.transpose(matmul_135, [0, 2, 1, 3]) + del matmul_135 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_103 = paddle._C_ops.reshape(transpose_71, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_59 = paddle._C_ops.matmul(reshape_103, parameter_158, False, False) + del parameter_158 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_80 = paddle._C_ops.add(matmul_59, parameter_157) + del parameter_157 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(add_80, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(reshape_104, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_72 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_105 = paddle._C_ops.reshape(transpose_72, full_int_array_62) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_105, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_106 = paddle._C_ops.reshape(roll_11, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_12 = paddle._C_ops.full( + [], + float("0.76087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_133 = full_12 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_20 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_196 = paddle._C_ops.add(full_12, uniform_20) + del uniform_20 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_20 = paddle._C_ops.floor(add_196) + del add_196 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_20 = paddle._C_ops.divide(reshape_106, full_12) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_20 = paddle._C_ops.multiply(divide_20, floor_20) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_81 = paddle._C_ops.add(add_76, multiply_20) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del parameter_154 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_82 = paddle._C_ops.add(matmul_60, parameter_153) + del parameter_153 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_11 = paddle._C_ops.gelu(add_82, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_61 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del parameter_152 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_83 = paddle._C_ops.add(matmul_61, parameter_151) + del parameter_151 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_21 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_197 = paddle._C_ops.add(full_12, uniform_21) + del uniform_21 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_21 = paddle._C_ops.floor(add_197) + del add_197 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_21 = paddle._C_ops.divide(add_83, full_12) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_21 = paddle._C_ops.multiply(divide_21, floor_21) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_84 = paddle._C_ops.add(add_81, multiply_21) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_84, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(layer_norm_81, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_285 = paddle._C_ops.reshape(reshape_107, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_73 = paddle._C_ops.transpose(reshape_285, [0, 1, 3, 2, 4, 5]) + del reshape_285 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_73, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_58) + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_62 = paddle._C_ops.matmul(reshape_109, parameter_148, False, False) + del parameter_148 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_85 = paddle._C_ops.add(matmul_62, parameter_147) + del parameter_147 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_286 = paddle._C_ops.reshape(add_85, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_286, [2, 0, 3, 1, 4]) + del reshape_286 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_48, full_0, float("0"), True) + del slice_48 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_49, [0, 1, 3, 2]) + del slice_49 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_63 = paddle._C_ops.matmul(scale_12, transpose_75, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_110 = paddle._C_ops.reshape(data_44, full_int_array_15) + del data_44 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_12 = paddle._C_ops.index_select(data_45, reshape_110, 0) + del data_45 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_287 = paddle._C_ops.reshape(index_select_12, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_76 = paddle._C_ops.transpose(reshape_287, [2, 0, 1]) + del reshape_287 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_198 = paddle._C_ops.add(matmul_63, unsqueeze_18) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_12 = paddle._C_ops.softmax(add_198, -1) + del add_198 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_136 = paddle._C_ops.matmul(softmax_12, slice_12, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_136, [0, 2, 1, 3]) + del matmul_136 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(transpose_77, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_64 = paddle._C_ops.matmul(reshape_111, parameter_146, False, False) + del parameter_146 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_86 = paddle._C_ops.add(matmul_64, parameter_145) + del parameter_145 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_112 = paddle._C_ops.reshape(add_86, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_288 = paddle._C_ops.reshape(reshape_112, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_78 = paddle._C_ops.transpose(reshape_288, [0, 1, 3, 2, 4, 5]) + del reshape_288 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_113 = paddle._C_ops.reshape(transpose_78, full_int_array_62) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_114 = paddle._C_ops.reshape(reshape_113, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_13 = paddle._C_ops.full( + [], + float("0.73913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_142 = full_13 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_22 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_199 = paddle._C_ops.add(full_13, uniform_22) + del uniform_22 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_22 = paddle._C_ops.floor(add_199) + del add_199 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_22 = paddle._C_ops.divide(reshape_114, full_13) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_22 = paddle._C_ops.multiply(divide_22, floor_22) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_87 = paddle._C_ops.add(add_84, multiply_22) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del parameter_142 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_88 = paddle._C_ops.add(matmul_65, parameter_141) + del parameter_141 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_12 = paddle._C_ops.gelu(add_88, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_66 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del parameter_140 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_89 = paddle._C_ops.add(matmul_66, parameter_139) + del parameter_139 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_23 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_200 = paddle._C_ops.add(full_13, uniform_23) + del uniform_23 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_23 = paddle._C_ops.floor(add_200) + del add_200 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_23 = paddle._C_ops.divide(add_89, full_13) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_23 = paddle._C_ops.multiply(divide_23, floor_23) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_90 = paddle._C_ops.add(add_87, multiply_23) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(layer_norm_87, full_int_array_55) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_115, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_289 = paddle._C_ops.reshape(roll_12, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_79 = paddle._C_ops.transpose(reshape_289, [0, 1, 3, 2, 4, 5]) + del reshape_289 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_116 = paddle._C_ops.reshape(transpose_79, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_117 = paddle._C_ops.reshape(reshape_116, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_40 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_40, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_40 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_290 = paddle._C_ops.reshape(set_value__6, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_152 = paddle._C_ops.transpose(reshape_290, [0, 1, 3, 2, 4, 5]) + del reshape_290 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(transpose_152, full_int_array_36) + del transpose_152 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_292 = paddle._C_ops.reshape(reshape_291, full_int_array_37) + del reshape_291 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_2) + del reshape_292 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_35, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_13 = paddle._C_ops.where(equal_6, full_36, where_12) + del equal_6, where_12 + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_67 = paddle._C_ops.matmul(reshape_117, parameter_136, False, False) + del parameter_136 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_91 = paddle._C_ops.add(matmul_67, parameter_135) + del parameter_135 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_293 = paddle._C_ops.reshape(add_91, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_293, [2, 0, 3, 1, 4]) + del reshape_293 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_50, full_0, float("0"), True) + del slice_50 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_51, [0, 1, 3, 2]) + del slice_51 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_68 = paddle._C_ops.matmul(scale_13, transpose_81, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_118 = paddle._C_ops.reshape(data_47, full_int_array_15) + del data_47 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_13 = paddle._C_ops.index_select(data_48, reshape_118, 0) + del data_48 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_294 = paddle._C_ops.reshape(index_select_13, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_82 = paddle._C_ops.transpose(reshape_294, [2, 0, 1]) + del reshape_294 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_92 = paddle._C_ops.add(matmul_68, unsqueeze_19) + + # pd_op.reshape: (4x4x16x144x144xf32) <- (16x16x144x144xf32, 5xi64) + reshape_119 = paddle._C_ops.reshape(add_92, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(where_13, full_int_array_1) + del where_13 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(unsqueeze_57, full_int_array_0) + del unsqueeze_57 + + # pd_op.add: (4x4x16x144x144xf32) <- (4x4x16x144x144xf32, 1x4x1x144x144xf32) + add_93 = paddle._C_ops.add(reshape_119, unsqueeze_20) + + # pd_op.reshape: (16x16x144x144xf32) <- (4x4x16x144x144xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(add_93, full_int_array_66) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_13 = paddle._C_ops.softmax(reshape_295, -1) + del reshape_295 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_13, slice_13, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_120 = paddle._C_ops.reshape(transpose_83, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_69 = paddle._C_ops.matmul(reshape_120, parameter_134, False, False) + del parameter_134 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_94 = paddle._C_ops.add(matmul_69, parameter_133) + del parameter_133 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(add_94, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_296 = paddle._C_ops.reshape(reshape_121, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_84 = paddle._C_ops.transpose(reshape_296, [0, 1, 3, 2, 4, 5]) + del reshape_296 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_122 = paddle._C_ops.reshape(transpose_84, full_int_array_62) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_122, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_123 = paddle._C_ops.reshape(roll_13, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_14 = paddle._C_ops.full( + [], + float("0.717391"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_153 = full_14 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_24 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_201 = paddle._C_ops.add(full_14, uniform_24) + del uniform_24 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_24 = paddle._C_ops.floor(add_201) + del add_201 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_24 = paddle._C_ops.divide(reshape_123, full_14) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_24 = paddle._C_ops.multiply(divide_24, floor_24) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_95 = paddle._C_ops.add(add_90, multiply_24) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del parameter_130 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_96 = paddle._C_ops.add(matmul_70, parameter_129) + del parameter_129 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_13 = paddle._C_ops.gelu(add_96, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_71 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del parameter_128 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_97 = paddle._C_ops.add(matmul_71, parameter_127) + del parameter_127 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_25 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_202 = paddle._C_ops.add(full_14, uniform_25) + del uniform_25 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_25 = paddle._C_ops.floor(add_202) + del add_202 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_25 = paddle._C_ops.divide(add_97, full_14) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_25 = paddle._C_ops.multiply(divide_25, floor_25) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_98 = paddle._C_ops.add(add_95, multiply_25) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_124 = paddle._C_ops.reshape(layer_norm_93, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_297 = paddle._C_ops.reshape(reshape_124, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_85 = paddle._C_ops.transpose(reshape_297, [0, 1, 3, 2, 4, 5]) + del reshape_297 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_85, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_58) + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_72 = paddle._C_ops.matmul(reshape_126, parameter_124, False, False) + del parameter_124 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_99 = paddle._C_ops.add(matmul_72, parameter_123) + del parameter_123 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_298 = paddle._C_ops.reshape(add_99, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_86 = paddle._C_ops.transpose(reshape_298, [2, 0, 3, 1, 4]) + del reshape_298 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_52, full_0, float("0"), True) + del slice_52 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_87 = paddle._C_ops.transpose(slice_53, [0, 1, 3, 2]) + del slice_53 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_73 = paddle._C_ops.matmul(scale_14, transpose_87, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_127 = paddle._C_ops.reshape(data_1, full_int_array_15) + del data_1 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_14 = paddle._C_ops.index_select(data_2, reshape_127, 0) + del data_2 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_299 = paddle._C_ops.reshape(index_select_14, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_88 = paddle._C_ops.transpose(reshape_299, [2, 0, 1]) + del reshape_299 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_88, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_203 = paddle._C_ops.add(matmul_73, unsqueeze_21) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_14 = paddle._C_ops.softmax(add_203, -1) + del add_203 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_138 = paddle._C_ops.matmul(softmax_14, slice_14, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_89 = paddle._C_ops.transpose(matmul_138, [0, 2, 1, 3]) + del matmul_138 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_128 = paddle._C_ops.reshape(transpose_89, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_74 = paddle._C_ops.matmul(reshape_128, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_100 = paddle._C_ops.add(matmul_74, parameter_121) + del parameter_121 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_129 = paddle._C_ops.reshape(add_100, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_300 = paddle._C_ops.reshape(reshape_129, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_90 = paddle._C_ops.transpose(reshape_300, [0, 1, 3, 2, 4, 5]) + del reshape_300 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_130 = paddle._C_ops.reshape(transpose_90, full_int_array_62) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_131 = paddle._C_ops.reshape(reshape_130, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_15 = paddle._C_ops.full( + [], + float("0.695652"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_162 = full_15 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_26 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_204 = paddle._C_ops.add(full_15, uniform_26) + del uniform_26 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_26 = paddle._C_ops.floor(add_204) + del add_204 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_26 = paddle._C_ops.divide(reshape_131, full_15) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_26 = paddle._C_ops.multiply(divide_26, floor_26) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_101 = paddle._C_ops.add(add_98, multiply_26) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del parameter_118 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_102 = paddle._C_ops.add(matmul_75, parameter_117) + del parameter_117 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_14 = paddle._C_ops.gelu(add_102, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_76 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del parameter_116 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_103 = paddle._C_ops.add(matmul_76, parameter_115) + del parameter_115 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_27 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_205 = paddle._C_ops.add(full_15, uniform_27) + del uniform_27 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_27 = paddle._C_ops.floor(add_205) + del add_205 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_27 = paddle._C_ops.divide(add_103, full_15) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_27 = paddle._C_ops.multiply(divide_27, floor_27) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_104 = paddle._C_ops.add(add_101, multiply_27) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_104, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_132 = paddle._C_ops.reshape(layer_norm_99, full_int_array_55) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_132, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_301 = paddle._C_ops.reshape(roll_14, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_91 = paddle._C_ops.transpose(reshape_301, [0, 1, 3, 2, 4, 5]) + del reshape_301 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_91, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_41 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_41, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_41 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_302 = paddle._C_ops.reshape(set_value__7, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_153 = paddle._C_ops.transpose(reshape_302, [0, 1, 3, 2, 4, 5]) + del reshape_302 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_303 = paddle._C_ops.reshape(transpose_153, full_int_array_36) + del transpose_153 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_304 = paddle._C_ops.reshape(reshape_303, full_int_array_37) + del reshape_303 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_2) + del reshape_304 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_58, unsqueeze_59) + del unsqueeze_58, unsqueeze_59 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_35, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_15 = paddle._C_ops.where(equal_7, full_36, where_14) + del equal_7, where_14 + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_77 = paddle._C_ops.matmul(reshape_134, parameter_112, False, False) + del parameter_112 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_105 = paddle._C_ops.add(matmul_77, parameter_111) + del parameter_111 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_305 = paddle._C_ops.reshape(add_105, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_92 = paddle._C_ops.transpose(reshape_305, [2, 0, 3, 1, 4]) + del reshape_305 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_54, full_0, float("0"), True) + del slice_54 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_93 = paddle._C_ops.transpose(slice_55, [0, 1, 3, 2]) + del slice_55 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_78 = paddle._C_ops.matmul(scale_15, transpose_93, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_135 = paddle._C_ops.reshape(data_3, full_int_array_15) + del data_3 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_15 = paddle._C_ops.index_select(data_4, reshape_135, 0) + del data_4 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_306 = paddle._C_ops.reshape(index_select_15, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_94 = paddle._C_ops.transpose(reshape_306, [2, 0, 1]) + del reshape_306 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(transpose_94, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_106 = paddle._C_ops.add(matmul_78, unsqueeze_22) + + # pd_op.reshape: (4x4x16x144x144xf32) <- (16x16x144x144xf32, 5xi64) + reshape_136 = paddle._C_ops.reshape(add_106, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(where_15, full_int_array_1) + del where_15 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_60, full_int_array_0) + del unsqueeze_60 + + # pd_op.add: (4x4x16x144x144xf32) <- (4x4x16x144x144xf32, 1x4x1x144x144xf32) + add_107 = paddle._C_ops.add(reshape_136, unsqueeze_23) + + # pd_op.reshape: (16x16x144x144xf32) <- (4x4x16x144x144xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(add_107, full_int_array_66) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_15 = paddle._C_ops.softmax(reshape_307, -1) + del reshape_307 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_139 = paddle._C_ops.matmul(softmax_15, slice_15, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_95 = paddle._C_ops.transpose(matmul_139, [0, 2, 1, 3]) + del matmul_139 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_137 = paddle._C_ops.reshape(transpose_95, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_79 = paddle._C_ops.matmul(reshape_137, parameter_110, False, False) + del parameter_110 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_108 = paddle._C_ops.add(matmul_79, parameter_109) + del parameter_109 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_138 = paddle._C_ops.reshape(add_108, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_308 = paddle._C_ops.reshape(reshape_138, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_96 = paddle._C_ops.transpose(reshape_308, [0, 1, 3, 2, 4, 5]) + del reshape_308 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_139 = paddle._C_ops.reshape(transpose_96, full_int_array_62) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_139, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_140 = paddle._C_ops.reshape(roll_15, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_16 = paddle._C_ops.full( + [], + float("0.673913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_173 = full_16 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_28 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_206 = paddle._C_ops.add(full_16, uniform_28) + del uniform_28 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_28 = paddle._C_ops.floor(add_206) + del add_206 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_28 = paddle._C_ops.divide(reshape_140, full_16) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_28 = paddle._C_ops.multiply(divide_28, floor_28) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_109 = paddle._C_ops.add(add_104, multiply_28) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_109, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del parameter_106 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_110 = paddle._C_ops.add(matmul_80, parameter_105) + del parameter_105 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_15 = paddle._C_ops.gelu(add_110, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_81 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del parameter_104 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_111 = paddle._C_ops.add(matmul_81, parameter_103) + del parameter_103 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_29 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_207 = paddle._C_ops.add(full_16, uniform_29) + del uniform_29 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_29 = paddle._C_ops.floor(add_207) + del add_207 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_29 = paddle._C_ops.divide(add_111, full_16) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_29 = paddle._C_ops.multiply(divide_29, floor_29) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_112 = paddle._C_ops.add(add_109, multiply_29) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_112, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(layer_norm_105, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_309 = paddle._C_ops.reshape(reshape_141, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_97 = paddle._C_ops.transpose(reshape_309, [0, 1, 3, 2, 4, 5]) + del reshape_309 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_142 = paddle._C_ops.reshape(transpose_97, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_143 = paddle._C_ops.reshape(reshape_142, full_int_array_58) + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_82 = paddle._C_ops.matmul(reshape_143, parameter_100, False, False) + del parameter_100 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_113 = paddle._C_ops.add(matmul_82, parameter_99) + del parameter_99 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_310 = paddle._C_ops.reshape(add_113, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_98 = paddle._C_ops.transpose(reshape_310, [2, 0, 3, 1, 4]) + del reshape_310 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_56, full_0, float("0"), True) + del slice_56 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_99 = paddle._C_ops.transpose(slice_57, [0, 1, 3, 2]) + del slice_57 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_83 = paddle._C_ops.matmul(scale_16, transpose_99, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_144 = paddle._C_ops.reshape(data_5, full_int_array_15) + del data_5 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_16 = paddle._C_ops.index_select(data_6, reshape_144, 0) + del data_6 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_311 = paddle._C_ops.reshape(index_select_16, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_100 = paddle._C_ops.transpose(reshape_311, [2, 0, 1]) + del reshape_311 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_100, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_208 = paddle._C_ops.add(matmul_83, unsqueeze_24) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_16 = paddle._C_ops.softmax(add_208, -1) + del add_208 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_140 = paddle._C_ops.matmul(softmax_16, slice_16, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_101 = paddle._C_ops.transpose(matmul_140, [0, 2, 1, 3]) + del matmul_140 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_145 = paddle._C_ops.reshape(transpose_101, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_84 = paddle._C_ops.matmul(reshape_145, parameter_98, False, False) + del parameter_98 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_114 = paddle._C_ops.add(matmul_84, parameter_97) + del parameter_97 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_114, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_312 = paddle._C_ops.reshape(reshape_146, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_102 = paddle._C_ops.transpose(reshape_312, [0, 1, 3, 2, 4, 5]) + del reshape_312 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_147 = paddle._C_ops.reshape(transpose_102, full_int_array_62) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_148 = paddle._C_ops.reshape(reshape_147, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_17 = paddle._C_ops.full( + [], + float("0.652174"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_182 = full_17 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_30 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_209 = paddle._C_ops.add(full_17, uniform_30) + del uniform_30 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_30 = paddle._C_ops.floor(add_209) + del add_209 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_30 = paddle._C_ops.divide(reshape_148, full_17) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_30 = paddle._C_ops.multiply(divide_30, floor_30) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_115 = paddle._C_ops.add(add_112, multiply_30) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_115, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_85 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del parameter_94 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_116 = paddle._C_ops.add(matmul_85, parameter_93) + del parameter_93 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_16 = paddle._C_ops.gelu(add_116, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_86 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del parameter_92 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_117 = paddle._C_ops.add(matmul_86, parameter_91) + del parameter_91 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_31 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_210 = paddle._C_ops.add(full_17, uniform_31) + del uniform_31 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_31 = paddle._C_ops.floor(add_210) + del add_210 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_31 = paddle._C_ops.divide(add_117, full_17) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_31 = paddle._C_ops.multiply(divide_31, floor_31) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_118 = paddle._C_ops.add(add_115, multiply_31) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_149 = paddle._C_ops.reshape(layer_norm_111, full_int_array_55) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_149, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(roll_16, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_103 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_103, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(reshape_150, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_42 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_42, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_42 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_314 = paddle._C_ops.reshape(set_value__8, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_154 = paddle._C_ops.transpose(reshape_314, [0, 1, 3, 2, 4, 5]) + del reshape_314 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_315 = paddle._C_ops.reshape(transpose_154, full_int_array_36) + del transpose_154 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_316 = paddle._C_ops.reshape(reshape_315, full_int_array_37) + del reshape_315 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_2) + del reshape_316 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_35, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_17 = paddle._C_ops.where(equal_8, full_36, where_16) + del equal_8, where_16 + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_87 = paddle._C_ops.matmul(reshape_151, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_119 = paddle._C_ops.add(matmul_87, parameter_87) + del parameter_87 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_317 = paddle._C_ops.reshape(add_119, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_104 = paddle._C_ops.transpose(reshape_317, [2, 0, 3, 1, 4]) + del reshape_317 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_58, full_0, float("0"), True) + del slice_58 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_105 = paddle._C_ops.transpose(slice_59, [0, 1, 3, 2]) + del slice_59 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_88 = paddle._C_ops.matmul(scale_17, transpose_105, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_152 = paddle._C_ops.reshape(data_7, full_int_array_15) + del data_7 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_17 = paddle._C_ops.index_select(data_8, reshape_152, 0) + del data_8 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_17, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_106 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(transpose_106, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_120 = paddle._C_ops.add(matmul_88, unsqueeze_25) + + # pd_op.reshape: (4x4x16x144x144xf32) <- (16x16x144x144xf32, 5xi64) + reshape_153 = paddle._C_ops.reshape(add_120, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(where_17, full_int_array_1) + del where_17 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(unsqueeze_63, full_int_array_0) + del unsqueeze_63 + + # pd_op.add: (4x4x16x144x144xf32) <- (4x4x16x144x144xf32, 1x4x1x144x144xf32) + add_121 = paddle._C_ops.add(reshape_153, unsqueeze_26) + + # pd_op.reshape: (16x16x144x144xf32) <- (4x4x16x144x144xf32, 4xi64) + reshape_319 = paddle._C_ops.reshape(add_121, full_int_array_66) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_17 = paddle._C_ops.softmax(reshape_319, -1) + del reshape_319 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_141 = paddle._C_ops.matmul(softmax_17, slice_17, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_107 = paddle._C_ops.transpose(matmul_141, [0, 2, 1, 3]) + del matmul_141 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_154 = paddle._C_ops.reshape(transpose_107, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_89 = paddle._C_ops.matmul(reshape_154, parameter_86, False, False) + del parameter_86 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_122 = paddle._C_ops.add(matmul_89, parameter_85) + del parameter_85 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_155 = paddle._C_ops.reshape(add_122, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_320 = paddle._C_ops.reshape(reshape_155, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_108 = paddle._C_ops.transpose(reshape_320, [0, 1, 3, 2, 4, 5]) + del reshape_320 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_156 = paddle._C_ops.reshape(transpose_108, full_int_array_62) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_156, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_157 = paddle._C_ops.reshape(roll_17, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_18 = paddle._C_ops.full( + [], + float("0.630435"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_193 = full_18 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_32 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_211 = paddle._C_ops.add(full_18, uniform_32) + del uniform_32 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_32 = paddle._C_ops.floor(add_211) + del add_211 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_32 = paddle._C_ops.divide(reshape_157, full_18) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_32 = paddle._C_ops.multiply(divide_32, floor_32) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_123 = paddle._C_ops.add(add_118, multiply_32) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_123, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del parameter_82 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_124 = paddle._C_ops.add(matmul_90, parameter_81) + del parameter_81 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_17 = paddle._C_ops.gelu(add_124, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_91 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del parameter_80 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_125 = paddle._C_ops.add(matmul_91, parameter_79) + del parameter_79 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_33 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_212 = paddle._C_ops.add(full_18, uniform_33) + del uniform_33 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_33 = paddle._C_ops.floor(add_212) + del add_212 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_33 = paddle._C_ops.divide(add_125, full_18) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_33 = paddle._C_ops.multiply(divide_33, floor_33) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_126 = paddle._C_ops.add(add_123, multiply_33) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_126, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_158 = paddle._C_ops.reshape(layer_norm_117, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_321 = paddle._C_ops.reshape(reshape_158, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_109 = paddle._C_ops.transpose(reshape_321, [0, 1, 3, 2, 4, 5]) + del reshape_321 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_159 = paddle._C_ops.reshape(transpose_109, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_160 = paddle._C_ops.reshape(reshape_159, full_int_array_58) + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_92 = paddle._C_ops.matmul(reshape_160, parameter_76, False, False) + del parameter_76 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_127 = paddle._C_ops.add(matmul_92, parameter_75) + del parameter_75 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_322 = paddle._C_ops.reshape(add_127, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_110 = paddle._C_ops.transpose(reshape_322, [2, 0, 3, 1, 4]) + del reshape_322 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_60, full_0, float("0"), True) + del slice_60 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_111 = paddle._C_ops.transpose(slice_61, [0, 1, 3, 2]) + del slice_61 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_93 = paddle._C_ops.matmul(scale_18, transpose_111, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_161 = paddle._C_ops.reshape(data_9, full_int_array_15) + del data_9 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_18 = paddle._C_ops.index_select(data_10, reshape_161, 0) + del data_10 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_323 = paddle._C_ops.reshape(index_select_18, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_112 = paddle._C_ops.transpose(reshape_323, [2, 0, 1]) + del reshape_323 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_112, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_213 = paddle._C_ops.add(matmul_93, unsqueeze_27) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_18 = paddle._C_ops.softmax(add_213, -1) + del add_213 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_142 = paddle._C_ops.matmul(softmax_18, slice_18, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_113 = paddle._C_ops.transpose(matmul_142, [0, 2, 1, 3]) + del matmul_142 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_162 = paddle._C_ops.reshape(transpose_113, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_94 = paddle._C_ops.matmul(reshape_162, parameter_74, False, False) + del parameter_74 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_128 = paddle._C_ops.add(matmul_94, parameter_73) + del parameter_73 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_163 = paddle._C_ops.reshape(add_128, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_324 = paddle._C_ops.reshape(reshape_163, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_114 = paddle._C_ops.transpose(reshape_324, [0, 1, 3, 2, 4, 5]) + del reshape_324 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(transpose_114, full_int_array_62) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_165 = paddle._C_ops.reshape(reshape_164, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_19 = paddle._C_ops.full( + [], + float("0.608696"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_202 = full_19 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_34 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_214 = paddle._C_ops.add(full_19, uniform_34) + del uniform_34 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_34 = paddle._C_ops.floor(add_214) + del add_214 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_34 = paddle._C_ops.divide(reshape_165, full_19) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_34 = paddle._C_ops.multiply(divide_34, floor_34) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_129 = paddle._C_ops.add(add_126, multiply_34) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_129, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del parameter_70 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_130 = paddle._C_ops.add(matmul_95, parameter_69) + del parameter_69 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_18 = paddle._C_ops.gelu(add_130, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_96 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del parameter_68 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_131 = paddle._C_ops.add(matmul_96, parameter_67) + del parameter_67 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_35 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_215 = paddle._C_ops.add(full_19, uniform_35) + del uniform_35 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_35 = paddle._C_ops.floor(add_215) + del add_215 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_35 = paddle._C_ops.divide(add_131, full_19) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_35 = paddle._C_ops.multiply(divide_35, floor_35) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_132 = paddle._C_ops.add(add_129, multiply_35) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_132, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(layer_norm_123, full_int_array_55) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_166, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_325 = paddle._C_ops.reshape(roll_18, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_115 = paddle._C_ops.transpose(reshape_325, [0, 1, 3, 2, 4, 5]) + del reshape_325 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_167 = paddle._C_ops.reshape(transpose_115, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_168 = paddle._C_ops.reshape(reshape_167, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_43 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_43, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_43 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_326 = paddle._C_ops.reshape(set_value__9, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_155 = paddle._C_ops.transpose(reshape_326, [0, 1, 3, 2, 4, 5]) + del reshape_326 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(transpose_155, full_int_array_36) + del transpose_155 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_328 = paddle._C_ops.reshape(reshape_327, full_int_array_37) + del reshape_327 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_2) + del reshape_328 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_64, unsqueeze_65) + del unsqueeze_64, unsqueeze_65 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_35, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_19 = paddle._C_ops.where(equal_9, full_36, where_18) + del equal_9, where_18 + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_97 = paddle._C_ops.matmul(reshape_168, parameter_64, False, False) + del parameter_64 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_133 = paddle._C_ops.add(matmul_97, parameter_63) + del parameter_63 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_329 = paddle._C_ops.reshape(add_133, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_116 = paddle._C_ops.transpose(reshape_329, [2, 0, 3, 1, 4]) + del reshape_329 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_62, full_0, float("0"), True) + del slice_62 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_117 = paddle._C_ops.transpose(slice_63, [0, 1, 3, 2]) + del slice_63 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_98 = paddle._C_ops.matmul(scale_19, transpose_117, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_169 = paddle._C_ops.reshape(data_11, full_int_array_15) + del data_11 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_19 = paddle._C_ops.index_select(data_12, reshape_169, 0) + del data_12 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_330 = paddle._C_ops.reshape(index_select_19, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_118 = paddle._C_ops.transpose(reshape_330, [2, 0, 1]) + del reshape_330 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(transpose_118, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_134 = paddle._C_ops.add(matmul_98, unsqueeze_28) + + # pd_op.reshape: (4x4x16x144x144xf32) <- (16x16x144x144xf32, 5xi64) + reshape_170 = paddle._C_ops.reshape(add_134, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(where_19, full_int_array_1) + del where_19 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_66, full_int_array_0) + del unsqueeze_66 + + # pd_op.add: (4x4x16x144x144xf32) <- (4x4x16x144x144xf32, 1x4x1x144x144xf32) + add_135 = paddle._C_ops.add(reshape_170, unsqueeze_29) + + # pd_op.reshape: (16x16x144x144xf32) <- (4x4x16x144x144xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(add_135, full_int_array_66) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_19 = paddle._C_ops.softmax(reshape_331, -1) + del reshape_331 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_19, slice_19, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_119 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_171 = paddle._C_ops.reshape(transpose_119, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_99 = paddle._C_ops.matmul(reshape_171, parameter_62, False, False) + del parameter_62 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_136 = paddle._C_ops.add(matmul_99, parameter_61) + del parameter_61 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_172 = paddle._C_ops.reshape(add_136, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_332 = paddle._C_ops.reshape(reshape_172, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_120 = paddle._C_ops.transpose(reshape_332, [0, 1, 3, 2, 4, 5]) + del reshape_332 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_173 = paddle._C_ops.reshape(transpose_120, full_int_array_62) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_173, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_174 = paddle._C_ops.reshape(roll_19, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_20 = paddle._C_ops.full( + [], + float("0.586957"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_213 = full_20 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_36 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_216 = paddle._C_ops.add(full_20, uniform_36) + del uniform_36 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_36 = paddle._C_ops.floor(add_216) + del add_216 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_36 = paddle._C_ops.divide(reshape_174, full_20) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_36 = paddle._C_ops.multiply(divide_36, floor_36) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_137 = paddle._C_ops.add(add_132, multiply_36) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_137, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_100 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del parameter_58 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_138 = paddle._C_ops.add(matmul_100, parameter_57) + del parameter_57 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_19 = paddle._C_ops.gelu(add_138, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_101 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del parameter_56 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_139 = paddle._C_ops.add(matmul_101, parameter_55) + del parameter_55 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_37 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_217 = paddle._C_ops.add(full_20, uniform_37) + del uniform_37 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_37 = paddle._C_ops.floor(add_217) + del add_217 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_37 = paddle._C_ops.divide(add_139, full_20) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_37 = paddle._C_ops.multiply(divide_37, floor_37) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_140 = paddle._C_ops.add(add_137, multiply_37) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(layer_norm_129, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_333 = paddle._C_ops.reshape(reshape_175, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_121 = paddle._C_ops.transpose(reshape_333, [0, 1, 3, 2, 4, 5]) + del reshape_333 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_176 = paddle._C_ops.reshape(transpose_121, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_177 = paddle._C_ops.reshape(reshape_176, full_int_array_58) + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_102 = paddle._C_ops.matmul(reshape_177, parameter_52, False, False) + del parameter_52 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_141 = paddle._C_ops.add(matmul_102, parameter_51) + del parameter_51 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_334 = paddle._C_ops.reshape(add_141, full_int_array_59) + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_122 = paddle._C_ops.transpose(reshape_334, [2, 0, 3, 1, 4]) + del reshape_334 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_64, full_0, float("0"), True) + del slice_64 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_123 = paddle._C_ops.transpose(slice_65, [0, 1, 3, 2]) + del slice_65 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_103 = paddle._C_ops.matmul(scale_20, transpose_123, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_178 = paddle._C_ops.reshape(data_13, full_int_array_15) + del data_13 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_20 = paddle._C_ops.index_select(data_14, reshape_178, 0) + del data_14 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_20, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_124 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_124, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_218 = paddle._C_ops.add(matmul_103, unsqueeze_30) + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_20 = paddle._C_ops.softmax(add_218, -1) + del add_218 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_144 = paddle._C_ops.matmul(softmax_20, slice_20, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_125 = paddle._C_ops.transpose(matmul_144, [0, 2, 1, 3]) + del matmul_144 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_179 = paddle._C_ops.reshape(transpose_125, full_int_array_60) + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_104 = paddle._C_ops.matmul(reshape_179, parameter_50, False, False) + del parameter_50 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_142 = paddle._C_ops.add(matmul_104, parameter_49) + del parameter_49 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_180 = paddle._C_ops.reshape(add_142, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_336 = paddle._C_ops.reshape(reshape_180, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_126 = paddle._C_ops.transpose(reshape_336, [0, 1, 3, 2, 4, 5]) + del reshape_336 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(transpose_126, full_int_array_62) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_21 = paddle._C_ops.full( + [], + float("0.565217"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_222 = full_21 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_38 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_219 = paddle._C_ops.add(full_21, uniform_38) + del uniform_38 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_38 = paddle._C_ops.floor(add_219) + del add_219 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_38 = paddle._C_ops.divide(reshape_182, full_21) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_38 = paddle._C_ops.multiply(divide_38, floor_38) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_143 = paddle._C_ops.add(add_140, multiply_38) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_105 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del parameter_46 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_144 = paddle._C_ops.add(matmul_105, parameter_45) + del parameter_45 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_20 = paddle._C_ops.gelu(add_144, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_106 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del parameter_44 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_145 = paddle._C_ops.add(matmul_106, parameter_43) + del parameter_43 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_39 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_220 = paddle._C_ops.add(full_21, uniform_39) + del uniform_39 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_39 = paddle._C_ops.floor(add_220) + del add_220 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_39 = paddle._C_ops.divide(add_145, full_21) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_39 = paddle._C_ops.multiply(divide_39, floor_39) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_146 = paddle._C_ops.add(add_143, multiply_39) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_146, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(layer_norm_135, full_int_array_55) + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_183, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x512xf32) <- (4x24x24x512xf32, 6xi64) + reshape_337 = paddle._C_ops.reshape(roll_20, full_int_array_56) + del full_int_array_56 + + # pd_op.transpose: (4x2x2x12x12x512xf32) <- (4x2x12x2x12x512xf32) + transpose_127 = paddle._C_ops.transpose(reshape_337, [0, 1, 3, 2, 4, 5]) + del reshape_337 + + # pd_op.reshape: (16x12x12x512xf32) <- (4x2x2x12x12x512xf32, 4xi64) + reshape_184 = paddle._C_ops.reshape(transpose_127, full_int_array_57) + + # pd_op.reshape: (16x144x512xf32) <- (16x12x12x512xf32, 3xi64) + reshape_185 = paddle._C_ops.reshape(reshape_184, full_int_array_58) + del full_int_array_58 + + # pd_op.full: (1x24x24x1xf32) <- () + full_44 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_44, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_44 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(set_value__10, full_int_array_64) + del full_int_array_64 + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_156 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_156, full_int_array_36) + del transpose_156 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, full_int_array_37) + del reshape_339 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_2) + del reshape_340 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_35, subtract_10) + del full_35, not_equal_10, subtract_10 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_21 = paddle._C_ops.where(equal_10, full_36, where_20) + del equal_10, full_36, where_20 + + # pd_op.matmul: (16x144x1536xf32) <- (16x144x512xf32, 512x1536xf32) + matmul_107 = paddle._C_ops.matmul(reshape_185, parameter_40, False, False) + del parameter_40 + + # pd_op.add: (16x144x1536xf32) <- (16x144x1536xf32, 1536xf32) + add_147 = paddle._C_ops.add(matmul_107, parameter_39) + del parameter_39 + + # pd_op.reshape: (16x144x3x16x32xf32) <- (16x144x1536xf32, 5xi64) + reshape_341 = paddle._C_ops.reshape(add_147, full_int_array_59) + del full_int_array_59 + + # pd_op.transpose: (3x16x16x144x32xf32) <- (16x144x3x16x32xf32) + transpose_128 = paddle._C_ops.transpose(reshape_341, [2, 0, 3, 1, 4]) + del reshape_341 + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x16x144x32xf32) <- (3x16x16x144x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x16x144x32xf32) <- (16x16x144x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_66, full_0, float("0"), True) + del slice_66 + + # pd_op.transpose: (16x16x32x144xf32) <- (16x16x144x32xf32) + transpose_129 = paddle._C_ops.transpose(slice_67, [0, 1, 3, 2]) + del slice_67 + + # pd_op.matmul: (16x16x144x144xf32) <- (16x16x144x32xf32, 16x16x32x144xf32) + matmul_108 = paddle._C_ops.matmul(scale_21, transpose_129, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_15, full_int_array_15) + del data_15 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_21 = paddle._C_ops.index_select(data_16, reshape_186, 0) + del data_16 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_342 = paddle._C_ops.reshape(index_select_21, full_int_array_16) + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_130 = paddle._C_ops.transpose(reshape_342, [2, 0, 1]) + del reshape_342 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(transpose_130, full_int_array_0) + + # pd_op.add: (16x16x144x144xf32) <- (16x16x144x144xf32, 1x16x144x144xf32) + add_148 = paddle._C_ops.add(matmul_108, unsqueeze_31) + + # pd_op.reshape: (4x4x16x144x144xf32) <- (16x16x144x144xf32, 5xi64) + reshape_187 = paddle._C_ops.reshape(add_148, full_int_array_65) + del full_int_array_65 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(where_21, full_int_array_1) + del where_21 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(unsqueeze_69, full_int_array_0) + del unsqueeze_69 + + # pd_op.add: (4x4x16x144x144xf32) <- (4x4x16x144x144xf32, 1x4x1x144x144xf32) + add_149 = paddle._C_ops.add(reshape_187, unsqueeze_32) + + # pd_op.reshape: (16x16x144x144xf32) <- (4x4x16x144x144xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(add_149, full_int_array_66) + del full_int_array_66 + + # pd_op.softmax: (16x16x144x144xf32) <- (16x16x144x144xf32) + softmax_21 = paddle._C_ops.softmax(reshape_343, -1) + del reshape_343 + + # pd_op.matmul: (16x16x144x32xf32) <- (16x16x144x144xf32, 16x16x144x32xf32) + matmul_145 = paddle._C_ops.matmul(softmax_21, slice_21, False, False) + + # pd_op.transpose: (16x144x16x32xf32) <- (16x16x144x32xf32) + transpose_131 = paddle._C_ops.transpose(matmul_145, [0, 2, 1, 3]) + del matmul_145 + + # pd_op.reshape: (16x144x512xf32) <- (16x144x16x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_131, full_int_array_60) + del full_int_array_60 + + # pd_op.matmul: (16x144x512xf32) <- (16x144x512xf32, 512x512xf32) + matmul_109 = paddle._C_ops.matmul(reshape_188, parameter_38, False, False) + del parameter_38 + + # pd_op.add: (16x144x512xf32) <- (16x144x512xf32, 512xf32) + add_150 = paddle._C_ops.add(matmul_109, parameter_37) + del parameter_37 + + # pd_op.reshape: (16x12x12x512xf32) <- (16x144x512xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_150, full_int_array_57) + del full_int_array_57 + + # pd_op.reshape: (4x2x2x12x12x512xf32) <- (16x12x12x512xf32, 6xi64) + reshape_344 = paddle._C_ops.reshape(reshape_189, full_int_array_61) + del full_int_array_61 + + # pd_op.transpose: (4x2x12x2x12x512xf32) <- (4x2x2x12x12x512xf32) + transpose_132 = paddle._C_ops.transpose(reshape_344, [0, 1, 3, 2, 4, 5]) + del reshape_344 + + # pd_op.reshape: (4x24x24x512xf32) <- (4x2x12x2x12x512xf32, 4xi64) + reshape_190 = paddle._C_ops.reshape(transpose_132, full_int_array_62) + del full_int_array_62 + + # pd_op.roll: (4x24x24x512xf32) <- (4x24x24x512xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_190, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x512xf32) <- (4x24x24x512xf32, 3xi64) + reshape_191 = paddle._C_ops.reshape(roll_21, full_int_array_63) + del full_int_array_63 + + # pd_op.full: (xf32) <- () + full_22 = paddle._C_ops.full( + [], + float("0.543478"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_233 = full_22 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_40 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_221 = paddle._C_ops.add(full_22, uniform_40) + del uniform_40 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_40 = paddle._C_ops.floor(add_221) + del add_221 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_40 = paddle._C_ops.divide(reshape_191, full_22) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_40 = paddle._C_ops.multiply(divide_40, floor_40) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_151 = paddle._C_ops.add(add_146, multiply_40) + + # pd_op.layer_norm: (4x576x512xf32, 4x576xf32, 4x576xf32) <- (4x576x512xf32, 512xf32, 512xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (4x576x2048xf32) <- (4x576x512xf32, 512x2048xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del parameter_34 + + # pd_op.add: (4x576x2048xf32) <- (4x576x2048xf32, 2048xf32) + add_152 = paddle._C_ops.add(matmul_110, parameter_33) + del parameter_33 + + # pd_op.gelu: (4x576x2048xf32) <- (4x576x2048xf32) + gelu_21 = paddle._C_ops.gelu(add_152, False) + + # pd_op.matmul: (4x576x512xf32) <- (4x576x2048xf32, 2048x512xf32) + matmul_111 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del parameter_32 + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 512xf32) + add_153 = paddle._C_ops.add(matmul_111, parameter_31) + del parameter_31 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_41 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_222 = paddle._C_ops.add(full_22, uniform_41) + del uniform_41 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_41 = paddle._C_ops.floor(add_222) + del add_222 + + # pd_op.divide: (4x576x512xf32) <- (4x576x512xf32, xf32) + divide_41 = paddle._C_ops.divide(add_153, full_22) + + # pd_op.multiply: (4x576x512xf32) <- (4x576x512xf32, 4x1x1xf32) + multiply_41 = paddle._C_ops.multiply(divide_41, floor_41) + + # pd_op.add: (4x576x512xf32) <- (4x576x512xf32, 4x576x512xf32) + add_154 = paddle._C_ops.add(add_151, multiply_41) + + # pd_op.reshape: (4x24x24x512xf32) <- (4x576x512xf32, 4xi64) + reshape_192 = paddle._C_ops.reshape(add_154, full_int_array_55) + + # pd_op.strided_slice: (4x12x12x512xf32) <- (4x24x24x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x12x12x512xf32) <- (4x24x24x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x12x12x512xf32) <- (4x24x24x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x12x12x512xf32) <- (4x24x24x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (4x24x24x512xf32) <- (4x24x24x512xf32, 4xi64) + reshape_345 = paddle._C_ops.reshape(reshape_192, full_int_array_55) + del full_int_array_55 + + # builtin.combine: ([4x12x12x512xf32, 4x12x12x512xf32, 4x12x12x512xf32, 4x12x12x512xf32]) <- (4x12x12x512xf32, 4x12x12x512xf32, 4x12x12x512xf32, 4x12x12x512xf32) + combine_2 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + + # pd_op.concat: (4x12x12x2048xf32) <- ([4x12x12x512xf32, 4x12x12x512xf32, 4x12x12x512xf32, 4x12x12x512xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_2, full_2) + del combine_2 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_67 = [4, -1, 2048] + + # pd_op.reshape: (4x144x2048xf32) <- (4x12x12x2048xf32, 3xi64) + reshape_193 = paddle._C_ops.reshape(concat_2, full_int_array_67) + del full_int_array_67 + + # pd_op.layer_norm: (4x144x2048xf32, 4x144xf32, 4x144xf32) <- (4x144x2048xf32, 2048xf32, 2048xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_193, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30 + + # pd_op.matmul: (4x144x1024xf32) <- (4x144x2048xf32, 2048x1024xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del parameter_28 + + # pd_op.layer_norm: (4x144x1024xf32, 4x144xf32, 4x144xf32) <- (4x144x1024xf32, 1024xf32, 1024xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_112, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_68 = [4, 12, 12, 1024] + + # pd_op.reshape: (4x12x12x1024xf32) <- (4x144x1024xf32, 4xi64) + reshape_194 = paddle._C_ops.reshape(layer_norm_144, full_int_array_68) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_69 = [4, 1, 12, 1, 12, 1024] + + # pd_op.reshape: (4x1x12x1x12x1024xf32) <- (4x12x12x1024xf32, 6xi64) + reshape_346 = paddle._C_ops.reshape(reshape_194, full_int_array_69) + + # pd_op.transpose: (4x1x1x12x12x1024xf32) <- (4x1x12x1x12x1024xf32) + transpose_133 = paddle._C_ops.transpose(reshape_346, [0, 1, 3, 2, 4, 5]) + del reshape_346 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_70 = [-1, 12, 12, 1024] + + # pd_op.reshape: (4x12x12x1024xf32) <- (4x1x1x12x12x1024xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_133, full_int_array_70) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_71 = [-1, 144, 1024] + + # pd_op.reshape: (4x144x1024xf32) <- (4x12x12x1024xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_71) + + # pd_op.matmul: (4x144x3072xf32) <- (4x144x1024xf32, 1024x3072xf32) + matmul_113 = paddle._C_ops.matmul(reshape_196, parameter_25, False, False) + del parameter_25 + + # pd_op.add: (4x144x3072xf32) <- (4x144x3072xf32, 3072xf32) + add_155 = paddle._C_ops.add(matmul_113, parameter_24) + del parameter_24 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_72 = [4, 144, 3, 32, 32] + + # pd_op.reshape: (4x144x3x32x32xf32) <- (4x144x3072xf32, 5xi64) + reshape_347 = paddle._C_ops.reshape(add_155, full_int_array_72) + + # pd_op.transpose: (3x4x32x144x32xf32) <- (4x144x3x32x32xf32) + transpose_134 = paddle._C_ops.transpose(reshape_347, [2, 0, 3, 1, 4]) + del reshape_347 + + # pd_op.slice: (4x32x144x32xf32) <- (3x4x32x144x32xf32, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (4x32x144x32xf32) <- (3x4x32x144x32xf32, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (4x32x144x32xf32) <- (3x4x32x144x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (4x32x144x32xf32) <- (4x32x144x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_68, full_0, float("0"), True) + del slice_68 + + # pd_op.transpose: (4x32x32x144xf32) <- (4x32x144x32xf32) + transpose_135 = paddle._C_ops.transpose(slice_69, [0, 1, 3, 2]) + del slice_69 + + # pd_op.matmul: (4x32x144x144xf32) <- (4x32x144x32xf32, 4x32x32x144xf32) + matmul_114 = paddle._C_ops.matmul(scale_22, transpose_135, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_197 = paddle._C_ops.reshape(data_18, full_int_array_15) + del data_18 + + # pd_op.index_select: (20736x32xf32) <- (529x32xf32, 20736xi64) + index_select_22 = paddle._C_ops.index_select(data_19, reshape_197, 0) + del data_19 + + # pd_op.reshape: (144x144x32xf32) <- (20736x32xf32, 3xi64) + reshape_348 = paddle._C_ops.reshape(index_select_22, full_int_array_16) + + # pd_op.transpose: (32x144x144xf32) <- (144x144x32xf32) + transpose_136 = paddle._C_ops.transpose(reshape_348, [2, 0, 1]) + del reshape_348 + + # pd_op.unsqueeze: (1x32x144x144xf32) <- (32x144x144xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_136, full_int_array_0) + + # pd_op.add: (4x32x144x144xf32) <- (4x32x144x144xf32, 1x32x144x144xf32) + add_223 = paddle._C_ops.add(matmul_114, unsqueeze_33) + + # pd_op.softmax: (4x32x144x144xf32) <- (4x32x144x144xf32) + softmax_22 = paddle._C_ops.softmax(add_223, -1) + del add_223 + + # pd_op.matmul: (4x32x144x32xf32) <- (4x32x144x144xf32, 4x32x144x32xf32) + matmul_146 = paddle._C_ops.matmul(softmax_22, slice_22, False, False) + + # pd_op.transpose: (4x144x32x32xf32) <- (4x32x144x32xf32) + transpose_137 = paddle._C_ops.transpose(matmul_146, [0, 2, 1, 3]) + del matmul_146 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_73 = [4, 144, 1024] + + # pd_op.reshape: (4x144x1024xf32) <- (4x144x32x32xf32, 3xi64) + reshape_198 = paddle._C_ops.reshape(transpose_137, full_int_array_73) + + # pd_op.matmul: (4x144x1024xf32) <- (4x144x1024xf32, 1024x1024xf32) + matmul_115 = paddle._C_ops.matmul(reshape_198, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (4x144x1024xf32) <- (4x144x1024xf32, 1024xf32) + add_156 = paddle._C_ops.add(matmul_115, parameter_22) + del parameter_22 + + # pd_op.reshape: (4x12x12x1024xf32) <- (4x144x1024xf32, 4xi64) + reshape_199 = paddle._C_ops.reshape(add_156, full_int_array_70) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_74 = [-1, 1, 1, 12, 12, 1024] + + # pd_op.reshape: (4x1x1x12x12x1024xf32) <- (4x12x12x1024xf32, 6xi64) + reshape_349 = paddle._C_ops.reshape(reshape_199, full_int_array_74) + + # pd_op.transpose: (4x1x12x1x12x1024xf32) <- (4x1x1x12x12x1024xf32) + transpose_138 = paddle._C_ops.transpose(reshape_349, [0, 1, 3, 2, 4, 5]) + del reshape_349 + + # pd_op.reshape: (4x12x12x1024xf32) <- (4x1x12x1x12x1024xf32, 4xi64) + reshape_200 = paddle._C_ops.reshape(transpose_138, full_int_array_70) + + # pd_op.reshape: (4x144x1024xf32) <- (4x12x12x1024xf32, 3xi64) + reshape_201 = paddle._C_ops.reshape(reshape_200, full_int_array_73) + + # pd_op.full: (xf32) <- () + full_23 = paddle._C_ops.full( + [], + float("0.521739"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_255 = full_23 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_42 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_224 = paddle._C_ops.add(full_23, uniform_42) + del uniform_42 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_42 = paddle._C_ops.floor(add_224) + del add_224 + + # pd_op.divide: (4x144x1024xf32) <- (4x144x1024xf32, xf32) + divide_42 = paddle._C_ops.divide(reshape_201, full_23) + + # pd_op.multiply: (4x144x1024xf32) <- (4x144x1024xf32, 4x1x1xf32) + multiply_42 = paddle._C_ops.multiply(divide_42, floor_42) + + # pd_op.add: (4x144x1024xf32) <- (4x144x1024xf32, 4x144x1024xf32) + add_157 = paddle._C_ops.add(matmul_112, multiply_42) + + # pd_op.layer_norm: (4x144x1024xf32, 4x144xf32, 4x144xf32) <- (4x144x1024xf32, 1024xf32, 1024xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_157, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (4x144x4096xf32) <- (4x144x1024xf32, 1024x4096xf32) + matmul_116 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del parameter_19 + + # pd_op.add: (4x144x4096xf32) <- (4x144x4096xf32, 4096xf32) + add_158 = paddle._C_ops.add(matmul_116, parameter_18) + del parameter_18 + + # pd_op.gelu: (4x144x4096xf32) <- (4x144x4096xf32) + gelu_22 = paddle._C_ops.gelu(add_158, False) + + # pd_op.matmul: (4x144x1024xf32) <- (4x144x4096xf32, 4096x1024xf32) + matmul_117 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (4x144x1024xf32) <- (4x144x1024xf32, 1024xf32) + add_159 = paddle._C_ops.add(matmul_117, parameter_16) + del parameter_16 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_43 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_225 = paddle._C_ops.add(full_23, uniform_43) + del uniform_43 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_43 = paddle._C_ops.floor(add_225) + del add_225 + + # pd_op.divide: (4x144x1024xf32) <- (4x144x1024xf32, xf32) + divide_43 = paddle._C_ops.divide(add_159, full_23) + + # pd_op.multiply: (4x144x1024xf32) <- (4x144x1024xf32, 4x1x1xf32) + multiply_43 = paddle._C_ops.multiply(divide_43, floor_43) + + # pd_op.add: (4x144x1024xf32) <- (4x144x1024xf32, 4x144x1024xf32) + add_160 = paddle._C_ops.add(add_157, multiply_43) + + # pd_op.layer_norm: (4x144x1024xf32, 4x144xf32, 4x144xf32) <- (4x144x1024xf32, 1024xf32, 1024xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_160, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # pd_op.reshape: (4x12x12x1024xf32) <- (4x144x1024xf32, 4xi64) + reshape_202 = paddle._C_ops.reshape(layer_norm_150, full_int_array_68) + del full_int_array_68 + + # pd_op.roll: (4x12x12x1024xf32) <- (4x12x12x1024xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_202, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x1x12x1x12x1024xf32) <- (4x12x12x1024xf32, 6xi64) + reshape_350 = paddle._C_ops.reshape(roll_22, full_int_array_69) + del full_int_array_69 + + # pd_op.transpose: (4x1x1x12x12x1024xf32) <- (4x1x12x1x12x1024xf32) + transpose_139 = paddle._C_ops.transpose(reshape_350, [0, 1, 3, 2, 4, 5]) + del reshape_350 + + # pd_op.reshape: (4x12x12x1024xf32) <- (4x1x1x12x12x1024xf32, 4xi64) + reshape_203 = paddle._C_ops.reshape(transpose_139, full_int_array_70) + + # pd_op.reshape: (4x144x1024xf32) <- (4x12x12x1024xf32, 3xi64) + reshape_204 = paddle._C_ops.reshape(reshape_203, full_int_array_71) + del full_int_array_71 + + # pd_op.full: (1x12x12x1xf32) <- () + full_45 = paddle._C_ops.full( + [1, 12, 12, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_45, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_45, full_int_array_21 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_24, set_value__100 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_26, full_int_array_27, set_value__101 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_28, set_value__102 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_22, set_value__103 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_25, full_int_array_30, set_value__104 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_31, full_int_array_32, set_value__105 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_29, full_int_array_33, set_value__106 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_34, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_75 = [1, 1, 12, 1, 12, 1] + + # pd_op.reshape: (1x1x12x1x12x1xf32) <- (1x12x12x1xf32, 6xi64) + reshape_351 = paddle._C_ops.reshape(set_value__11, full_int_array_75) + del full_int_array_75 + + # pd_op.transpose: (1x1x1x12x12x1xf32) <- (1x1x12x1x12x1xf32) + transpose_157 = paddle._C_ops.transpose(reshape_351, [0, 1, 3, 2, 4, 5]) + del reshape_351 + + # pd_op.reshape: (1x12x12x1xf32) <- (1x1x1x12x12x1xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(transpose_157, full_int_array_36) + del full_int_array_36, transpose_157 + + # pd_op.reshape: (1x144xf32) <- (1x12x12x1xf32, 2xi64) + reshape_353 = paddle._C_ops.reshape(reshape_352, full_int_array_37) + del full_int_array_37, reshape_352 + + # pd_op.unsqueeze: (1x1x144xf32) <- (1x144xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_1) + + # pd_op.unsqueeze: (1x144x1xf32) <- (1x144xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_2) + del reshape_353 + + # pd_op.subtract: (1x144x144xf32) <- (1x1x144xf32, 1x144x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_70, unsqueeze_71) + del unsqueeze_70, unsqueeze_71 + + # pd_op.not_equal: (1x144x144xb) <- (1x144x144xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_26) + + # pd_op.full: (1x144x144xf32) <- () + full_46 = paddle._C_ops.full( + [1, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x144x144xf32) <- (1x144x144xb, 1x144x144xf32, 1x144x144xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_46, subtract_11) + del full_46, not_equal_11, subtract_11 + + # pd_op.equal: (1x144x144xb) <- (1x144x144xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_26) + del full_26 + + # pd_op.full: (1x144x144xf32) <- () + full_47 = paddle._C_ops.full( + [1, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x144x144xf32) <- (1x144x144xb, 1x144x144xf32, 1x144x144xf32) + where_23 = paddle._C_ops.where(equal_11, full_47, where_22) + del equal_11, full_47, where_22 + + # pd_op.matmul: (4x144x3072xf32) <- (4x144x1024xf32, 1024x3072xf32) + matmul_118 = paddle._C_ops.matmul(reshape_204, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (4x144x3072xf32) <- (4x144x3072xf32, 3072xf32) + add_161 = paddle._C_ops.add(matmul_118, parameter_12) + del parameter_12 + + # pd_op.reshape: (4x144x3x32x32xf32) <- (4x144x3072xf32, 5xi64) + reshape_354 = paddle._C_ops.reshape(add_161, full_int_array_72) + del full_int_array_72 + + # pd_op.transpose: (3x4x32x144x32xf32) <- (4x144x3x32x32xf32) + transpose_140 = paddle._C_ops.transpose(reshape_354, [2, 0, 3, 1, 4]) + del reshape_354 + + # pd_op.slice: (4x32x144x32xf32) <- (3x4x32x144x32xf32, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (4x32x144x32xf32) <- (3x4x32x144x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (4x32x144x32xf32) <- (3x4x32x144x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (4x32x144x32xf32) <- (4x32x144x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_70, full_0, float("0"), True) + del slice_70 + + # pd_op.transpose: (4x32x32x144xf32) <- (4x32x144x32xf32) + transpose_141 = paddle._C_ops.transpose(slice_71, [0, 1, 3, 2]) + del slice_71 + + # pd_op.matmul: (4x32x144x144xf32) <- (4x32x144x32xf32, 4x32x32x144xf32) + matmul_119 = paddle._C_ops.matmul(scale_23, transpose_141, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_205 = paddle._C_ops.reshape(data_21, full_int_array_15) + del data_21, full_int_array_15 + + # pd_op.index_select: (20736x32xf32) <- (529x32xf32, 20736xi64) + index_select_23 = paddle._C_ops.index_select(data_22, reshape_205, 0) + del data_22 + + # pd_op.reshape: (144x144x32xf32) <- (20736x32xf32, 3xi64) + reshape_355 = paddle._C_ops.reshape(index_select_23, full_int_array_16) + del full_int_array_16 + + # pd_op.transpose: (32x144x144xf32) <- (144x144x32xf32) + transpose_142 = paddle._C_ops.transpose(reshape_355, [2, 0, 1]) + del reshape_355 + + # pd_op.unsqueeze: (1x32x144x144xf32) <- (32x144x144xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(transpose_142, full_int_array_0) + + # pd_op.add: (4x32x144x144xf32) <- (4x32x144x144xf32, 1x32x144x144xf32) + add_162 = paddle._C_ops.add(matmul_119, unsqueeze_34) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_76 = [4, 1, 32, 144, 144] + + # pd_op.reshape: (4x1x32x144x144xf32) <- (4x32x144x144xf32, 5xi64) + reshape_206 = paddle._C_ops.reshape(add_162, full_int_array_76) + del full_int_array_76 + + # pd_op.unsqueeze: (1x1x144x144xf32) <- (1x144x144xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(where_23, full_int_array_1) + del where_23 + + # pd_op.unsqueeze: (1x1x1x144x144xf32) <- (1x1x144x144xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_72, full_int_array_0) + del unsqueeze_72 + + # pd_op.add: (4x1x32x144x144xf32) <- (4x1x32x144x144xf32, 1x1x1x144x144xf32) + add_163 = paddle._C_ops.add(reshape_206, unsqueeze_35) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_77 = [4, 32, 144, 144] + + # pd_op.reshape: (4x32x144x144xf32) <- (4x1x32x144x144xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(add_163, full_int_array_77) + del full_int_array_77 + + # pd_op.softmax: (4x32x144x144xf32) <- (4x32x144x144xf32) + softmax_23 = paddle._C_ops.softmax(reshape_356, -1) + del reshape_356 + + # pd_op.matmul: (4x32x144x32xf32) <- (4x32x144x144xf32, 4x32x144x32xf32) + matmul_147 = paddle._C_ops.matmul(softmax_23, slice_23, False, False) + + # pd_op.transpose: (4x144x32x32xf32) <- (4x32x144x32xf32) + transpose_143 = paddle._C_ops.transpose(matmul_147, [0, 2, 1, 3]) + del matmul_147 + + # pd_op.reshape: (4x144x1024xf32) <- (4x144x32x32xf32, 3xi64) + reshape_207 = paddle._C_ops.reshape(transpose_143, full_int_array_73) + + # pd_op.matmul: (4x144x1024xf32) <- (4x144x1024xf32, 1024x1024xf32) + matmul_120 = paddle._C_ops.matmul(reshape_207, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (4x144x1024xf32) <- (4x144x1024xf32, 1024xf32) + add_164 = paddle._C_ops.add(matmul_120, parameter_10) + del parameter_10 + + # pd_op.reshape: (4x12x12x1024xf32) <- (4x144x1024xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(add_164, full_int_array_70) + + # pd_op.reshape: (4x1x1x12x12x1024xf32) <- (4x12x12x1024xf32, 6xi64) + reshape_357 = paddle._C_ops.reshape(reshape_208, full_int_array_74) + del full_int_array_74 + + # pd_op.transpose: (4x1x12x1x12x1024xf32) <- (4x1x1x12x12x1024xf32) + transpose_144 = paddle._C_ops.transpose(reshape_357, [0, 1, 3, 2, 4, 5]) + del reshape_357 + + # pd_op.reshape: (4x12x12x1024xf32) <- (4x1x12x1x12x1024xf32, 4xi64) + reshape_209 = paddle._C_ops.reshape(transpose_144, full_int_array_70) + del full_int_array_70 + + # pd_op.roll: (4x12x12x1024xf32) <- (4x12x12x1024xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_209, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x144x1024xf32) <- (4x12x12x1024xf32, 3xi64) + reshape_210 = paddle._C_ops.reshape(roll_23, full_int_array_73) + del full_int_array_73 + + # pd_op.full: (xf32) <- () + full_24 = paddle._C_ops.full( + [], float("0.5"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_266 = full_24 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_44 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_226 = paddle._C_ops.add(full_24, uniform_44) + del uniform_44 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_44 = paddle._C_ops.floor(add_226) + del add_226 + + # pd_op.divide: (4x144x1024xf32) <- (4x144x1024xf32, xf32) + divide_44 = paddle._C_ops.divide(reshape_210, full_24) + + # pd_op.multiply: (4x144x1024xf32) <- (4x144x1024xf32, 4x1x1xf32) + multiply_44 = paddle._C_ops.multiply(divide_44, floor_44) + + # pd_op.add: (4x144x1024xf32) <- (4x144x1024xf32, 4x144x1024xf32) + add_165 = paddle._C_ops.add(add_160, multiply_44) + + # pd_op.layer_norm: (4x144x1024xf32, 4x144xf32, 4x144xf32) <- (4x144x1024xf32, 1024xf32, 1024xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_165, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (4x144x4096xf32) <- (4x144x1024xf32, 1024x4096xf32) + matmul_121 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (4x144x4096xf32) <- (4x144x4096xf32, 4096xf32) + add_166 = paddle._C_ops.add(matmul_121, parameter_6) + del parameter_6 + + # pd_op.gelu: (4x144x4096xf32) <- (4x144x4096xf32) + gelu_23 = paddle._C_ops.gelu(add_166, False) + + # pd_op.matmul: (4x144x1024xf32) <- (4x144x4096xf32, 4096x1024xf32) + matmul_122 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del parameter_5 + + # pd_op.add: (4x144x1024xf32) <- (4x144x1024xf32, 1024xf32) + add_167 = paddle._C_ops.add(matmul_122, parameter_4) + del parameter_4 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_45 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + del full_29, full_30, full_int_array_40 + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_227 = paddle._C_ops.add(full_24, uniform_45) + del uniform_45 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_45 = paddle._C_ops.floor(add_227) + del add_227 + + # pd_op.divide: (4x144x1024xf32) <- (4x144x1024xf32, xf32) + divide_45 = paddle._C_ops.divide(add_167, full_24) + + # pd_op.multiply: (4x144x1024xf32) <- (4x144x1024xf32, 4x1x1xf32) + multiply_45 = paddle._C_ops.multiply(divide_45, floor_45) + + # pd_op.add: (4x144x1024xf32) <- (4x144x1024xf32, 4x144x1024xf32) + add_168 = paddle._C_ops.add(add_165, multiply_45) + + # pd_op.layer_norm: (4x144x1024xf32, 4x144xf32, 4x144xf32) <- (4x144x1024xf32, 1024xf32, 1024xf32) + layer_norm_158, layer_norm_156, layer_norm_157 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_168, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_2, parameter_3 + + # pd_op.transpose: (4x1024x144xf32) <- (4x144x1024xf32) + transpose_145 = paddle._C_ops.transpose(layer_norm_158, [0, 2, 1]) + del layer_norm_158 + + # pd_op.unsqueeze: (4x1024x1x144xf32) <- (4x1024x144xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_145, full_int_array_2) + + # pd_op.pool2d: (4x1024x1x1xf32) <- (4x1024x1x144xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_36, + full_int_array_23, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_23 + + # pd_op.squeeze: (4x1024x1xf32) <- (4x1024x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_2) + + # pd_op.flatten: (4x1024xf32) <- (4x1024x1xf32) + flatten_0 = paddle._C_ops.flatten(squeeze_0, 1, 2) + + # pd_op.matmul: (4x102xf32) <- (4x1024xf32, 1024x102xf32) + matmul_123 = paddle._C_ops.matmul(flatten_0, parameter_1, False, False) + del parameter_1 + + # pd_op.add: (4x102xf32) <- (4x102xf32, 102xf32) + add_169 = paddle._C_ops.add(matmul_123, parameter_0) + del ( + assign_0, + assign_1, + assign_10, + assign_101, + assign_103, + assign_104, + assign_105, + assign_106, + assign_107, + assign_108, + assign_109, + assign_111, + assign_112, + assign_114, + assign_115, + assign_116, + assign_117, + assign_118, + assign_119, + assign_12, + assign_121, + assign_123, + assign_124, + assign_125, + assign_126, + assign_127, + assign_128, + assign_129, + assign_13, + assign_131, + assign_132, + assign_134, + assign_135, + assign_136, + assign_137, + assign_138, + assign_139, + assign_14, + assign_141, + assign_143, + assign_144, + assign_145, + assign_146, + assign_147, + assign_148, + assign_149, + assign_15, + assign_151, + assign_152, + assign_154, + assign_155, + assign_156, + assign_157, + assign_158, + assign_159, + assign_16, + assign_161, + assign_163, + assign_164, + assign_165, + assign_166, + assign_167, + assign_168, + assign_169, + assign_17, + assign_171, + assign_172, + assign_174, + assign_175, + assign_176, + assign_177, + assign_178, + assign_179, + assign_18, + assign_181, + assign_183, + assign_184, + assign_185, + assign_186, + assign_187, + assign_188, + assign_189, + assign_19, + assign_191, + assign_192, + assign_194, + assign_195, + assign_196, + assign_197, + assign_198, + assign_199, + assign_2, + assign_20, + assign_201, + assign_203, + assign_204, + assign_205, + assign_206, + assign_207, + assign_208, + assign_209, + assign_21, + assign_211, + assign_212, + assign_214, + assign_215, + assign_216, + assign_217, + assign_218, + assign_219, + assign_22, + assign_221, + assign_223, + assign_224, + assign_225, + assign_226, + assign_227, + assign_228, + assign_229, + assign_23, + assign_231, + assign_232, + assign_234, + assign_235, + assign_236, + assign_237, + assign_238, + assign_239, + assign_24, + assign_240, + assign_241, + assign_242, + assign_243, + assign_244, + assign_245, + assign_247, + assign_248, + assign_249, + assign_25, + assign_250, + assign_251, + assign_252, + assign_254, + assign_256, + assign_257, + assign_258, + assign_259, + assign_26, + assign_260, + assign_261, + assign_262, + assign_264, + assign_265, + assign_267, + assign_268, + assign_269, + assign_28, + assign_3, + assign_30, + assign_31, + assign_32, + assign_33, + assign_34, + assign_35, + assign_36, + assign_38, + assign_39, + assign_4, + assign_41, + assign_42, + assign_43, + assign_44, + assign_45, + assign_46, + assign_47, + assign_48, + assign_49, + assign_5, + assign_50, + assign_51, + assign_52, + assign_54, + assign_55, + assign_56, + assign_57, + assign_58, + assign_59, + assign_6, + assign_61, + assign_63, + assign_64, + assign_65, + assign_66, + assign_67, + assign_68, + assign_69, + assign_7, + assign_71, + assign_72, + assign_74, + assign_75, + assign_76, + assign_77, + assign_78, + assign_79, + assign_8, + assign_81, + assign_83, + assign_84, + assign_85, + assign_86, + assign_87, + assign_88, + assign_89, + assign_91, + assign_92, + assign_94, + assign_95, + assign_96, + assign_97, + assign_98, + assign_99, + full_int_array_0, + full_int_array_1, + full_int_array_2, + full_int_array_3, + full_int_array_4, + full_int_array_5, + full_int_array_6, + full_int_array_7, + full_int_array_8, + parameter_0, + ) + + return ( + conv2d_0, + reshape_0, + add_0, + transpose_0, + layer_norm_0, + layer_norm_1, + layer_norm_2, + layer_norm_3, + layer_norm_4, + layer_norm_5, + reshape_1, + transpose_1, + reshape_2, + reshape_3, + matmul_0, + add_1, + transpose_2, + slice_0, + full_0, + scale_0, + transpose_3, + matmul_1, + reshape_4, + index_select_0, + transpose_4, + unsqueeze_0, + softmax_0, + transpose_5, + reshape_5, + matmul_2, + add_2, + reshape_6, + transpose_6, + reshape_7, + reshape_8, + add_3, + layer_norm_6, + layer_norm_7, + layer_norm_8, + matmul_3, + add_4, + gelu_0, + matmul_4, + add_5, + add_6, + layer_norm_9, + layer_norm_10, + layer_norm_11, + reshape_9, + roll_0, + transpose_7, + reshape_10, + reshape_11, + matmul_5, + add_7, + transpose_8, + slice_1, + assign_9, + scale_1, + transpose_9, + matmul_6, + reshape_12, + index_select_1, + transpose_10, + unsqueeze_1, + add_8, + reshape_13, + unsqueeze_2, + add_9, + softmax_1, + transpose_11, + reshape_14, + matmul_7, + add_10, + reshape_15, + transpose_12, + reshape_16, + roll_1, + reshape_17, + full_1, + floor_0, + divide_0, + multiply_0, + add_11, + layer_norm_12, + layer_norm_13, + layer_norm_14, + matmul_8, + add_12, + gelu_1, + matmul_9, + add_13, + assign_11, + floor_1, + divide_1, + multiply_1, + add_14, + reshape_18, + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + full_2, + concat_0, + reshape_19, + layer_norm_15, + layer_norm_16, + layer_norm_17, + matmul_10, + layer_norm_18, + layer_norm_19, + layer_norm_20, + reshape_20, + transpose_13, + reshape_21, + reshape_22, + matmul_11, + add_15, + transpose_14, + slice_2, + assign_27, + scale_2, + transpose_15, + matmul_12, + reshape_23, + index_select_2, + transpose_16, + unsqueeze_3, + softmax_2, + transpose_17, + reshape_24, + matmul_13, + add_16, + reshape_25, + transpose_18, + reshape_26, + reshape_27, + full_3, + floor_2, + divide_2, + multiply_2, + add_17, + layer_norm_21, + layer_norm_22, + layer_norm_23, + matmul_14, + add_18, + gelu_2, + matmul_15, + add_19, + assign_29, + floor_3, + divide_3, + multiply_3, + add_20, + layer_norm_24, + layer_norm_25, + layer_norm_26, + reshape_28, + roll_2, + transpose_19, + reshape_29, + reshape_30, + matmul_16, + add_21, + transpose_20, + slice_3, + assign_37, + scale_3, + transpose_21, + matmul_17, + reshape_31, + index_select_3, + transpose_22, + unsqueeze_4, + add_22, + reshape_32, + unsqueeze_5, + add_23, + softmax_3, + transpose_23, + reshape_33, + matmul_18, + add_24, + reshape_34, + transpose_24, + reshape_35, + roll_3, + reshape_36, + full_4, + floor_4, + divide_4, + multiply_4, + add_25, + layer_norm_27, + layer_norm_28, + layer_norm_29, + matmul_19, + add_26, + gelu_3, + matmul_20, + add_27, + assign_40, + floor_5, + divide_5, + multiply_5, + add_28, + reshape_37, + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + assign_53, + concat_1, + reshape_38, + layer_norm_30, + layer_norm_31, + layer_norm_32, + matmul_21, + layer_norm_33, + layer_norm_34, + layer_norm_35, + reshape_39, + transpose_25, + reshape_40, + reshape_41, + matmul_22, + add_29, + transpose_26, + slice_4, + assign_60, + scale_4, + transpose_27, + matmul_23, + reshape_42, + index_select_4, + transpose_28, + unsqueeze_6, + softmax_4, + transpose_29, + reshape_43, + matmul_24, + add_30, + reshape_44, + transpose_30, + reshape_45, + reshape_46, + full_5, + floor_6, + divide_6, + multiply_6, + add_31, + layer_norm_36, + layer_norm_37, + layer_norm_38, + matmul_25, + add_32, + gelu_4, + matmul_26, + add_33, + assign_62, + floor_7, + divide_7, + multiply_7, + add_34, + layer_norm_39, + layer_norm_40, + layer_norm_41, + reshape_47, + roll_4, + transpose_31, + reshape_48, + reshape_49, + matmul_27, + add_35, + transpose_32, + slice_5, + assign_70, + scale_5, + transpose_33, + matmul_28, + reshape_50, + index_select_5, + transpose_34, + unsqueeze_7, + add_36, + reshape_51, + unsqueeze_8, + add_37, + softmax_5, + transpose_35, + reshape_52, + matmul_29, + add_38, + reshape_53, + transpose_36, + reshape_54, + roll_5, + reshape_55, + full_6, + floor_8, + divide_8, + multiply_8, + add_39, + layer_norm_42, + layer_norm_43, + layer_norm_44, + matmul_30, + add_40, + gelu_5, + matmul_31, + add_41, + assign_73, + floor_9, + divide_9, + multiply_9, + add_42, + layer_norm_45, + layer_norm_46, + layer_norm_47, + reshape_56, + transpose_37, + reshape_57, + reshape_58, + matmul_32, + add_43, + transpose_38, + slice_6, + assign_80, + scale_6, + transpose_39, + matmul_33, + reshape_59, + index_select_6, + transpose_40, + unsqueeze_9, + softmax_6, + transpose_41, + reshape_60, + matmul_34, + add_44, + reshape_61, + transpose_42, + reshape_62, + reshape_63, + full_7, + floor_10, + divide_10, + multiply_10, + add_45, + layer_norm_48, + layer_norm_49, + layer_norm_50, + matmul_35, + add_46, + gelu_6, + matmul_36, + add_47, + assign_82, + floor_11, + divide_11, + multiply_11, + add_48, + layer_norm_51, + layer_norm_52, + layer_norm_53, + reshape_64, + roll_6, + transpose_43, + reshape_65, + reshape_66, + matmul_37, + add_49, + transpose_44, + slice_7, + assign_90, + scale_7, + transpose_45, + matmul_38, + reshape_67, + index_select_7, + transpose_46, + unsqueeze_10, + add_50, + reshape_68, + unsqueeze_11, + add_51, + softmax_7, + transpose_47, + reshape_69, + matmul_39, + add_52, + reshape_70, + transpose_48, + reshape_71, + roll_7, + reshape_72, + full_8, + floor_12, + divide_12, + multiply_12, + add_53, + layer_norm_54, + layer_norm_55, + layer_norm_56, + matmul_40, + add_54, + gelu_7, + matmul_41, + add_55, + assign_93, + floor_13, + divide_13, + multiply_13, + add_56, + layer_norm_57, + layer_norm_58, + layer_norm_59, + reshape_73, + transpose_49, + reshape_74, + reshape_75, + matmul_42, + add_57, + transpose_50, + slice_8, + assign_100, + scale_8, + transpose_51, + matmul_43, + reshape_76, + index_select_8, + transpose_52, + unsqueeze_12, + softmax_8, + transpose_53, + reshape_77, + matmul_44, + add_58, + reshape_78, + transpose_54, + reshape_79, + reshape_80, + full_9, + floor_14, + divide_14, + multiply_14, + add_59, + layer_norm_60, + layer_norm_61, + layer_norm_62, + matmul_45, + add_60, + gelu_8, + matmul_46, + add_61, + assign_102, + floor_15, + divide_15, + multiply_15, + add_62, + layer_norm_63, + layer_norm_64, + layer_norm_65, + reshape_81, + roll_8, + transpose_55, + reshape_82, + reshape_83, + matmul_47, + add_63, + transpose_56, + slice_9, + assign_110, + scale_9, + transpose_57, + matmul_48, + reshape_84, + index_select_9, + transpose_58, + unsqueeze_13, + add_64, + reshape_85, + unsqueeze_14, + add_65, + softmax_9, + transpose_59, + reshape_86, + matmul_49, + add_66, + reshape_87, + transpose_60, + reshape_88, + roll_9, + reshape_89, + full_10, + floor_16, + divide_16, + multiply_16, + add_67, + layer_norm_66, + layer_norm_67, + layer_norm_68, + matmul_50, + add_68, + gelu_9, + matmul_51, + add_69, + assign_113, + floor_17, + divide_17, + multiply_17, + add_70, + layer_norm_69, + layer_norm_70, + layer_norm_71, + reshape_90, + transpose_61, + reshape_91, + reshape_92, + matmul_52, + add_71, + transpose_62, + slice_10, + assign_120, + scale_10, + transpose_63, + matmul_53, + reshape_93, + index_select_10, + transpose_64, + unsqueeze_15, + softmax_10, + transpose_65, + reshape_94, + matmul_54, + add_72, + reshape_95, + transpose_66, + reshape_96, + reshape_97, + full_11, + floor_18, + divide_18, + multiply_18, + add_73, + layer_norm_72, + layer_norm_73, + layer_norm_74, + matmul_55, + add_74, + gelu_10, + matmul_56, + add_75, + assign_122, + floor_19, + divide_19, + multiply_19, + add_76, + layer_norm_75, + layer_norm_76, + layer_norm_77, + reshape_98, + roll_10, + transpose_67, + reshape_99, + reshape_100, + matmul_57, + add_77, + transpose_68, + slice_11, + assign_130, + scale_11, + transpose_69, + matmul_58, + reshape_101, + index_select_11, + transpose_70, + unsqueeze_16, + add_78, + reshape_102, + unsqueeze_17, + add_79, + softmax_11, + transpose_71, + reshape_103, + matmul_59, + add_80, + reshape_104, + transpose_72, + reshape_105, + roll_11, + reshape_106, + full_12, + floor_20, + divide_20, + multiply_20, + add_81, + layer_norm_78, + layer_norm_79, + layer_norm_80, + matmul_60, + add_82, + gelu_11, + matmul_61, + add_83, + assign_133, + floor_21, + divide_21, + multiply_21, + add_84, + layer_norm_81, + layer_norm_82, + layer_norm_83, + reshape_107, + transpose_73, + reshape_108, + reshape_109, + matmul_62, + add_85, + transpose_74, + slice_12, + assign_140, + scale_12, + transpose_75, + matmul_63, + reshape_110, + index_select_12, + transpose_76, + unsqueeze_18, + softmax_12, + transpose_77, + reshape_111, + matmul_64, + add_86, + reshape_112, + transpose_78, + reshape_113, + reshape_114, + full_13, + floor_22, + divide_22, + multiply_22, + add_87, + layer_norm_84, + layer_norm_85, + layer_norm_86, + matmul_65, + add_88, + gelu_12, + matmul_66, + add_89, + assign_142, + floor_23, + divide_23, + multiply_23, + add_90, + layer_norm_87, + layer_norm_88, + layer_norm_89, + reshape_115, + roll_12, + transpose_79, + reshape_116, + reshape_117, + matmul_67, + add_91, + transpose_80, + slice_13, + assign_150, + scale_13, + transpose_81, + matmul_68, + reshape_118, + index_select_13, + transpose_82, + unsqueeze_19, + add_92, + reshape_119, + unsqueeze_20, + add_93, + softmax_13, + transpose_83, + reshape_120, + matmul_69, + add_94, + reshape_121, + transpose_84, + reshape_122, + roll_13, + reshape_123, + full_14, + floor_24, + divide_24, + multiply_24, + add_95, + layer_norm_90, + layer_norm_91, + layer_norm_92, + matmul_70, + add_96, + gelu_13, + matmul_71, + add_97, + assign_153, + floor_25, + divide_25, + multiply_25, + add_98, + layer_norm_93, + layer_norm_94, + layer_norm_95, + reshape_124, + transpose_85, + reshape_125, + reshape_126, + matmul_72, + add_99, + transpose_86, + slice_14, + assign_160, + scale_14, + transpose_87, + matmul_73, + reshape_127, + index_select_14, + transpose_88, + unsqueeze_21, + softmax_14, + transpose_89, + reshape_128, + matmul_74, + add_100, + reshape_129, + transpose_90, + reshape_130, + reshape_131, + full_15, + floor_26, + divide_26, + multiply_26, + add_101, + layer_norm_96, + layer_norm_97, + layer_norm_98, + matmul_75, + add_102, + gelu_14, + matmul_76, + add_103, + assign_162, + floor_27, + divide_27, + multiply_27, + add_104, + layer_norm_99, + layer_norm_100, + layer_norm_101, + reshape_132, + roll_14, + transpose_91, + reshape_133, + reshape_134, + matmul_77, + add_105, + transpose_92, + slice_15, + assign_170, + scale_15, + transpose_93, + matmul_78, + reshape_135, + index_select_15, + transpose_94, + unsqueeze_22, + add_106, + reshape_136, + unsqueeze_23, + add_107, + softmax_15, + transpose_95, + reshape_137, + matmul_79, + add_108, + reshape_138, + transpose_96, + reshape_139, + roll_15, + reshape_140, + full_16, + floor_28, + divide_28, + multiply_28, + add_109, + layer_norm_102, + layer_norm_103, + layer_norm_104, + matmul_80, + add_110, + gelu_15, + matmul_81, + add_111, + assign_173, + floor_29, + divide_29, + multiply_29, + add_112, + layer_norm_105, + layer_norm_106, + layer_norm_107, + reshape_141, + transpose_97, + reshape_142, + reshape_143, + matmul_82, + add_113, + transpose_98, + slice_16, + assign_180, + scale_16, + transpose_99, + matmul_83, + reshape_144, + index_select_16, + transpose_100, + unsqueeze_24, + softmax_16, + transpose_101, + reshape_145, + matmul_84, + add_114, + reshape_146, + transpose_102, + reshape_147, + reshape_148, + full_17, + floor_30, + divide_30, + multiply_30, + add_115, + layer_norm_108, + layer_norm_109, + layer_norm_110, + matmul_85, + add_116, + gelu_16, + matmul_86, + add_117, + assign_182, + floor_31, + divide_31, + multiply_31, + add_118, + layer_norm_111, + layer_norm_112, + layer_norm_113, + reshape_149, + roll_16, + transpose_103, + reshape_150, + reshape_151, + matmul_87, + add_119, + transpose_104, + slice_17, + assign_190, + scale_17, + transpose_105, + matmul_88, + reshape_152, + index_select_17, + transpose_106, + unsqueeze_25, + add_120, + reshape_153, + unsqueeze_26, + add_121, + softmax_17, + transpose_107, + reshape_154, + matmul_89, + add_122, + reshape_155, + transpose_108, + reshape_156, + roll_17, + reshape_157, + full_18, + floor_32, + divide_32, + multiply_32, + add_123, + layer_norm_114, + layer_norm_115, + layer_norm_116, + matmul_90, + add_124, + gelu_17, + matmul_91, + add_125, + assign_193, + floor_33, + divide_33, + multiply_33, + add_126, + layer_norm_117, + layer_norm_118, + layer_norm_119, + reshape_158, + transpose_109, + reshape_159, + reshape_160, + matmul_92, + add_127, + transpose_110, + slice_18, + assign_200, + scale_18, + transpose_111, + matmul_93, + reshape_161, + index_select_18, + transpose_112, + unsqueeze_27, + softmax_18, + transpose_113, + reshape_162, + matmul_94, + add_128, + reshape_163, + transpose_114, + reshape_164, + reshape_165, + full_19, + floor_34, + divide_34, + multiply_34, + add_129, + layer_norm_120, + layer_norm_121, + layer_norm_122, + matmul_95, + add_130, + gelu_18, + matmul_96, + add_131, + assign_202, + floor_35, + divide_35, + multiply_35, + add_132, + layer_norm_123, + layer_norm_124, + layer_norm_125, + reshape_166, + roll_18, + transpose_115, + reshape_167, + reshape_168, + matmul_97, + add_133, + transpose_116, + slice_19, + assign_210, + scale_19, + transpose_117, + matmul_98, + reshape_169, + index_select_19, + transpose_118, + unsqueeze_28, + add_134, + reshape_170, + unsqueeze_29, + add_135, + softmax_19, + transpose_119, + reshape_171, + matmul_99, + add_136, + reshape_172, + transpose_120, + reshape_173, + roll_19, + reshape_174, + full_20, + floor_36, + divide_36, + multiply_36, + add_137, + layer_norm_126, + layer_norm_127, + layer_norm_128, + matmul_100, + add_138, + gelu_19, + matmul_101, + add_139, + assign_213, + floor_37, + divide_37, + multiply_37, + add_140, + layer_norm_129, + layer_norm_130, + layer_norm_131, + reshape_175, + transpose_121, + reshape_176, + reshape_177, + matmul_102, + add_141, + transpose_122, + slice_20, + assign_220, + scale_20, + transpose_123, + matmul_103, + reshape_178, + index_select_20, + transpose_124, + unsqueeze_30, + softmax_20, + transpose_125, + reshape_179, + matmul_104, + add_142, + reshape_180, + transpose_126, + reshape_181, + reshape_182, + full_21, + floor_38, + divide_38, + multiply_38, + add_143, + layer_norm_132, + layer_norm_133, + layer_norm_134, + matmul_105, + add_144, + gelu_20, + matmul_106, + add_145, + assign_222, + floor_39, + divide_39, + multiply_39, + add_146, + layer_norm_135, + layer_norm_136, + layer_norm_137, + reshape_183, + roll_20, + transpose_127, + reshape_184, + reshape_185, + matmul_107, + add_147, + transpose_128, + slice_21, + assign_230, + scale_21, + transpose_129, + matmul_108, + reshape_186, + index_select_21, + transpose_130, + unsqueeze_31, + add_148, + reshape_187, + unsqueeze_32, + add_149, + softmax_21, + transpose_131, + reshape_188, + matmul_109, + add_150, + reshape_189, + transpose_132, + reshape_190, + roll_21, + reshape_191, + full_22, + floor_40, + divide_40, + multiply_40, + add_151, + layer_norm_138, + layer_norm_139, + layer_norm_140, + matmul_110, + add_152, + gelu_21, + matmul_111, + add_153, + assign_233, + floor_41, + divide_41, + multiply_41, + add_154, + reshape_192, + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + assign_246, + concat_2, + reshape_193, + layer_norm_141, + layer_norm_142, + layer_norm_143, + matmul_112, + layer_norm_144, + layer_norm_145, + layer_norm_146, + reshape_194, + transpose_133, + reshape_195, + reshape_196, + matmul_113, + add_155, + transpose_134, + slice_22, + assign_253, + scale_22, + transpose_135, + matmul_114, + reshape_197, + index_select_22, + transpose_136, + unsqueeze_33, + softmax_22, + transpose_137, + reshape_198, + matmul_115, + add_156, + reshape_199, + transpose_138, + reshape_200, + reshape_201, + full_23, + floor_42, + divide_42, + multiply_42, + add_157, + layer_norm_147, + layer_norm_148, + layer_norm_149, + matmul_116, + add_158, + gelu_22, + matmul_117, + add_159, + assign_255, + floor_43, + divide_43, + multiply_43, + add_160, + layer_norm_150, + layer_norm_151, + layer_norm_152, + reshape_202, + roll_22, + transpose_139, + reshape_203, + reshape_204, + matmul_118, + add_161, + transpose_140, + slice_23, + assign_263, + scale_23, + transpose_141, + matmul_119, + reshape_205, + index_select_23, + transpose_142, + unsqueeze_34, + add_162, + reshape_206, + unsqueeze_35, + add_163, + softmax_23, + transpose_143, + reshape_207, + matmul_120, + add_164, + reshape_208, + transpose_144, + reshape_209, + roll_23, + reshape_210, + full_24, + floor_44, + divide_44, + multiply_44, + add_165, + layer_norm_153, + layer_norm_154, + layer_norm_155, + matmul_121, + add_166, + gelu_23, + matmul_122, + add_167, + assign_266, + floor_45, + divide_45, + multiply_45, + add_168, + layer_norm_156, + layer_norm_157, + transpose_145, + unsqueeze_36, + pool2d_0, + squeeze_0, + flatten_0, + matmul_123, + add_169, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/weight_meta.py new file mode 100644 index 00000000..69d1deec --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_0/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [1024, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [4096, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1024, 4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [1024, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1024, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [4096, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [1024, 4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1024, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [1024, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [2048, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [1024, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1024, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [256, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [256, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [256, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [1024, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [256, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [256, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [256, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [512, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [512, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [128, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [128, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [128, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [512, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [128, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [128, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [128, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [128, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/graph_net.json b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/input_meta.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/input_meta.py new file mode 100644 index 00000000..aba1cd51 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [64, 3, 384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [529, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [529, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [529, 8] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [529, 8] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [529, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [529, 32] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [529, 32] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/model.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/model.py new file mode 100644 index 00000000..0a210212 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/model.py @@ -0,0 +1,9747 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.shape64: (4xi64) <- (-1x3x384x384xf32) + shape64_0 = paddle._C_ops.shape64(data_0) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [1] + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + shape64_0, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_0 + + # pd_op.conv2d: (-1x128x96x96xf32) <- (-1x3x384x384xf32, 128x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_2 = [1, -1, 1, 1] + + # pd_op.reshape: (1x128x1x1xf32) <- (128xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_2) + del full_int_array_2, parameter_303 + + # pd_op.add: (-1x128x96x96xf32) <- (-1x128x96x96xf32, 1x128x1x1xf32) + add_1 = paddle._C_ops.add(conv2d_0, reshape_0) + del conv2d_0, reshape_0 + + # pd_op.shape64: (4xi64) <- (-1x128x96x96xf32) + shape64_1 = paddle._C_ops.shape64(add_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + shape64_1, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_1 + + # pd_op.flatten: (-1x128x9216xf32) <- (-1x128x96x96xf32) + flatten_0 = paddle._C_ops.flatten(add_1, 2, 3) + del add_1 + + # pd_op.transpose: (-1x9216x128xf32) <- (-1x128x9216xf32) + transpose_0 = paddle._C_ops.transpose(flatten_0, [0, 2, 1]) + del flatten_0 + + # pd_op.layer_norm: (-1x9216x128xf32, -1x9216xf32, -1x9216xf32) <- (-1x9216x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302, transpose_0 + + # pd_op.shape64: (3xi64) <- (-1x9216x128xf32) + shape64_2 = paddle._C_ops.shape64(layer_norm_0) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + shape64_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_2 + + # pd_op.layer_norm: (-1x9216x128xf32, -1x9216xf32, -1x9216xf32) <- (-1x9216x128xf32, 128xf32, 128xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("96"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_1 = paddle._C_ops.full( + [], float("128"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_0 = [slice_2, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_0 = paddle._C_ops.stack(combine_0, 0) + del combine_0 + + # pd_op.reshape: (-1x96x96x128xf32) <- (-1x9216x128xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, stack_0) + del layer_norm_3, stack_0 + + # pd_op.shape64: (4xi64) <- (-1x96x96x128xf32) + shape64_3 = paddle._C_ops.shape64(reshape_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + shape64_3, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_3 + + # pd_op.full: (xi64) <- () + full_2 = paddle._C_ops.full( + [], float("8"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_3 = paddle._C_ops.full( + [], float("12"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_1 = [slice_3, full_2, full_3, full_2, full_3, full_1] + del slice_3 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_1 = paddle._C_ops.stack(combine_1, 0) + del combine_1 + + # pd_op.reshape: (-1x8x12x8x12x128xf32) <- (-1x96x96x128xf32, 6xi64) + reshape_2 = paddle._C_ops.reshape(reshape_1, stack_1) + del reshape_1, stack_1 + + # pd_op.transpose: (-1x8x8x12x12x128xf32) <- (-1x8x12x8x12x128xf32) + transpose_1 = paddle._C_ops.transpose(reshape_2, [0, 1, 3, 2, 4, 5]) + del reshape_2 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_3 = [-1, 12, 12, 128] + + # pd_op.reshape: (-1x12x12x128xf32) <- (-1x8x8x12x12x128xf32, 4xi64) + reshape_3 = paddle._C_ops.reshape(transpose_1, full_int_array_3) + del transpose_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [-1, 144, 128] + + # pd_op.reshape: (-1x144x128xf32) <- (-1x12x12x128xf32, 3xi64) + reshape_4 = paddle._C_ops.reshape(reshape_3, full_int_array_4) + del reshape_3 + + # pd_op.shape64: (3xi64) <- (-1x144x128xf32) + shape64_4 = paddle._C_ops.shape64(reshape_4) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + shape64_4, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_4 + + # pd_op.matmul: (-1x144x384xf32) <- (-1x144x128xf32, 128x384xf32) + matmul_0 = paddle._C_ops.matmul(reshape_4, parameter_298, False, False) + del parameter_298, reshape_4 + + # pd_op.add: (-1x144x384xf32) <- (-1x144x384xf32, 384xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_297) + del matmul_0, parameter_297 + + # pd_op.full: (xi64) <- () + full_4 = paddle._C_ops.full( + [], float("144"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_5 = paddle._C_ops.full( + [], float("3"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_6 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_7 = paddle._C_ops.full( + [], float("32"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_2 = [slice_4, full_4, full_5, full_6, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_2 = paddle._C_ops.stack(combine_2, 0) + del combine_2 + + # pd_op.reshape: (-1x144x3x4x32xf32) <- (-1x144x384xf32, 5xi64) + reshape_5 = paddle._C_ops.reshape(add_2, stack_2) + del add_2, stack_2 + + # pd_op.transpose: (3x-1x4x144x32xf32) <- (-1x144x3x4x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_5, [2, 0, 3, 1, 4]) + del reshape_5 + + # pd_op.slice: (-1x4x144x32xf32) <- (3x-1x4x144x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_5 = [2] + + # pd_op.slice: (-1x4x144x32xf32) <- (3x-1x4x144x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_6 = [3] + + # pd_op.slice: (-1x4x144x32xf32) <- (3x-1x4x144x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_2 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (-1x4x144x32xf32) <- (-1x4x144x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_5, full_8, float("0"), True) + del slice_5 + + # pd_op.transpose: (-1x4x32x144xf32) <- (-1x4x144x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_6, [0, 1, 3, 2]) + del slice_6 + + # pd_op.matmul: (-1x4x144x144xf32) <- (-1x4x144x32xf32, -1x4x32x144xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + del scale_0, transpose_3 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [-1] + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_6 = paddle._C_ops.reshape(data_1, full_int_array_7) + del data_1 + + # pd_op.index_select: (20736x4xf32) <- (529x4xf32, 20736xi64) + index_select_0 = paddle._C_ops.index_select(data_2, reshape_6, 0) + del data_2, reshape_6 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_8 = [144, 144, -1] + + # pd_op.reshape: (144x144x4xf32) <- (20736x4xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(index_select_0, full_int_array_8) + del index_select_0 + + # pd_op.transpose: (4x144x144xf32) <- (144x144x4xf32) + transpose_4 = paddle._C_ops.transpose(reshape_7, [2, 0, 1]) + del reshape_7 + + # pd_op.unsqueeze: (1x4x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_0) + del transpose_4 + + # pd_op.add: (-1x4x144x144xf32) <- (-1x4x144x144xf32, 1x4x144x144xf32) + add_3 = paddle._C_ops.add(matmul_1, unsqueeze_0) + del matmul_1, unsqueeze_0 + + # pd_op.softmax: (-1x4x144x144xf32) <- (-1x4x144x144xf32) + softmax_0 = paddle._C_ops.softmax(add_3, -1) + del add_3 + + # pd_op.matmul: (-1x4x144x32xf32) <- (-1x4x144x144xf32, -1x4x144x32xf32) + matmul_2 = paddle._C_ops.matmul(softmax_0, slice_7, False, False) + del slice_7, softmax_0 + + # pd_op.transpose: (-1x144x4x32xf32) <- (-1x4x144x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_2, [0, 2, 1, 3]) + del matmul_2 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_3 = [slice_4, full_4, full_1] + del slice_4 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_3 = paddle._C_ops.stack(combine_3, 0) + del combine_3 + + # pd_op.reshape: (-1x144x128xf32) <- (-1x144x4x32xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(transpose_5, stack_3) + del stack_3, transpose_5 + + # pd_op.matmul: (-1x144x128xf32) <- (-1x144x128xf32, 128x128xf32) + matmul_3 = paddle._C_ops.matmul(reshape_8, parameter_296, False, False) + del parameter_296, reshape_8 + + # pd_op.add: (-1x144x128xf32) <- (-1x144x128xf32, 128xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_295) + del matmul_3, parameter_295 + + # pd_op.reshape: (-1x12x12x128xf32) <- (-1x144x128xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_4, full_int_array_3) + del add_4 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_9 = [-1, 8, 8, 12, 12, 128] + + # pd_op.reshape: (-1x8x8x12x12x128xf32) <- (-1x12x12x128xf32, 6xi64) + reshape_10 = paddle._C_ops.reshape(reshape_9, full_int_array_9) + del reshape_9 + + # pd_op.transpose: (-1x8x12x8x12x128xf32) <- (-1x8x8x12x12x128xf32) + transpose_6 = paddle._C_ops.transpose(reshape_10, [0, 1, 3, 2, 4, 5]) + del reshape_10 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [-1, 96, 96, 128] + + # pd_op.reshape: (-1x96x96x128xf32) <- (-1x8x12x8x12x128xf32, 4xi64) + reshape_11 = paddle._C_ops.reshape(transpose_6, full_int_array_10) + del transpose_6 + + # pd_op.full: (xi64) <- () + full_9 = paddle._C_ops.full( + [], float("9216"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_4 = [slice_2, full_9, full_1] + del slice_2 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_4 = paddle._C_ops.stack(combine_4, 0) + del combine_4 + + # pd_op.reshape: (-1x9216x128xf32) <- (-1x96x96x128xf32, 3xi64) + reshape_12 = paddle._C_ops.reshape(reshape_11, stack_4) + del reshape_11, stack_4 + + # pd_op.add: (-1x9216x128xf32) <- (-1x9216x128xf32, -1x9216x128xf32) + add_5 = paddle._C_ops.add(layer_norm_0, reshape_12) + del layer_norm_0, reshape_12 + + # pd_op.layer_norm: (-1x9216x128xf32, -1x9216xf32, -1x9216xf32) <- (-1x9216x128xf32, 128xf32, 128xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_5, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (-1x9216x512xf32) <- (-1x9216x128xf32, 128x512xf32) + matmul_4 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del layer_norm_6, parameter_292 + + # pd_op.add: (-1x9216x512xf32) <- (-1x9216x512xf32, 512xf32) + add_6 = paddle._C_ops.add(matmul_4, parameter_291) + del matmul_4, parameter_291 + + # pd_op.gelu: (-1x9216x512xf32) <- (-1x9216x512xf32) + gelu_0 = paddle._C_ops.gelu(add_6, False) + del add_6 + + # pd_op.matmul: (-1x9216x128xf32) <- (-1x9216x512xf32, 512x128xf32) + matmul_5 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del gelu_0, parameter_290 + + # pd_op.add: (-1x9216x128xf32) <- (-1x9216x128xf32, 128xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_289) + del matmul_5, parameter_289 + + # pd_op.add: (-1x9216x128xf32) <- (-1x9216x128xf32, -1x9216x128xf32) + add_8 = paddle._C_ops.add(add_5, add_7) + del add_5, add_7 + + # pd_op.shape64: (3xi64) <- (-1x9216x128xf32) + shape64_5 = paddle._C_ops.shape64(add_8) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + shape64_5, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_5 + + # pd_op.layer_norm: (-1x9216x128xf32, -1x9216xf32, -1x9216xf32) <- (-1x9216x128xf32, 128xf32, 128xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_5 = [slice_8, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_5 = paddle._C_ops.stack(combine_5, 0) + del combine_5 + + # pd_op.reshape: (-1x96x96x128xf32) <- (-1x9216x128xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(layer_norm_9, stack_5) + del layer_norm_9, stack_5 + + # pd_op.shape64: (4xi64) <- (-1x96x96x128xf32) + shape64_6 = paddle._C_ops.shape64(reshape_13) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + shape64_6, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_11 = [-6, -6] + + # pd_op.roll: (-1x96x96x128xf32) <- (-1x96x96x128xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_13, full_int_array_11, [1, 2]) + del reshape_13 + + # pd_op.shape64: (4xi64) <- (-1x96x96x128xf32) + shape64_7 = paddle._C_ops.shape64(roll_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + shape64_7, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_7 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_6 = [slice_10, full_2, full_3, full_2, full_3, full_1] + del slice_10 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_6 = paddle._C_ops.stack(combine_6, 0) + del combine_6 + + # pd_op.reshape: (-1x8x12x8x12x128xf32) <- (-1x96x96x128xf32, 6xi64) + reshape_14 = paddle._C_ops.reshape(roll_0, stack_6) + del roll_0, stack_6 + + # pd_op.transpose: (-1x8x8x12x12x128xf32) <- (-1x8x12x8x12x128xf32) + transpose_7 = paddle._C_ops.transpose(reshape_14, [0, 1, 3, 2, 4, 5]) + del reshape_14 + + # pd_op.reshape: (-1x12x12x128xf32) <- (-1x8x8x12x12x128xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(transpose_7, full_int_array_3) + del transpose_7 + + # pd_op.reshape: (-1x144x128xf32) <- (-1x12x12x128xf32, 3xi64) + reshape_16 = paddle._C_ops.reshape(reshape_15, full_int_array_4) + del full_int_array_4, reshape_15 + + # pd_op.full: (1x96x96x1xf32) <- () + full_10 = paddle._C_ops.full( + [1, 96, 96, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_12 = [0, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_13 = [-12, -12] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_14 = [1, 1] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_10, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_10 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_15 = [0, -12] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_16 = [-12, -6] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_17 = [0, -6] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_18 = [-12, 2147483647] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_19 = [-12, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_20 = [-6, -12] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [-6, 2147483647] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-6, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [2147483647, -12] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [2147483647, -6] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [2147483647, 2147483647] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_26 = [1, 8, 12, 8, 12, 1] + + # pd_op.reshape: (1x8x12x8x12x1xf32) <- (1x96x96x1xf32, 6xi64) + reshape_17 = paddle._C_ops.reshape(set_value__0, full_int_array_26) + del full_int_array_26 + + # pd_op.transpose: (1x8x8x12x12x1xf32) <- (1x8x12x8x12x1xf32) + transpose_8 = paddle._C_ops.transpose(reshape_17, [0, 1, 3, 2, 4, 5]) + del reshape_17 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_27 = [-1, 12, 12, 1] + + # pd_op.reshape: (64x12x12x1xf32) <- (1x8x8x12x12x1xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(transpose_8, full_int_array_27) + del transpose_8 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [-1, 144] + + # pd_op.reshape: (64x144xf32) <- (64x12x12x1xf32, 2xi64) + reshape_19 = paddle._C_ops.reshape(reshape_18, full_int_array_28) + del reshape_18 + + # pd_op.unsqueeze: (64x1x144xf32) <- (64x144xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(reshape_19, full_int_array_1) + + # pd_op.unsqueeze: (64x144x1xf32) <- (64x144xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(reshape_19, full_int_array_5) + del reshape_19 + + # pd_op.subtract: (64x144x144xf32) <- (64x1x144xf32, 64x144x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_1, unsqueeze_2) + del unsqueeze_1, unsqueeze_2 + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x144x144xb) <- (64x144x144xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_11) + + # pd_op.full: (64x144x144xf32) <- () + full_12 = paddle._C_ops.full( + [64, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x144x144xf32) <- (64x144x144xb, 64x144x144xf32, 64x144x144xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_12, subtract_0) + del full_12, not_equal_0, subtract_0 + + # pd_op.equal: (64x144x144xb) <- (64x144x144xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_11) + + # pd_op.full: (64x144x144xf32) <- () + full_13 = paddle._C_ops.full( + [64, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x144x144xf32) <- (64x144x144xb, 64x144x144xf32, 64x144x144xf32) + where_1 = paddle._C_ops.where(equal_0, full_13, where_0) + del equal_0, full_13, where_0 + + # pd_op.shape64: (3xi64) <- (-1x144x128xf32) + shape64_8 = paddle._C_ops.shape64(reshape_16) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + shape64_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_8 + + # pd_op.matmul: (-1x144x384xf32) <- (-1x144x128xf32, 128x384xf32) + matmul_6 = paddle._C_ops.matmul(reshape_16, parameter_286, False, False) + del parameter_286, reshape_16 + + # pd_op.add: (-1x144x384xf32) <- (-1x144x384xf32, 384xf32) + add_9 = paddle._C_ops.add(matmul_6, parameter_285) + del matmul_6, parameter_285 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_7 = [slice_11, full_4, full_5, full_6, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_7 = paddle._C_ops.stack(combine_7, 0) + del combine_7 + + # pd_op.reshape: (-1x144x3x4x32xf32) <- (-1x144x384xf32, 5xi64) + reshape_20 = paddle._C_ops.reshape(add_9, stack_7) + del add_9, stack_7 + + # pd_op.transpose: (3x-1x4x144x32xf32) <- (-1x144x3x4x32xf32) + transpose_9 = paddle._C_ops.transpose(reshape_20, [2, 0, 3, 1, 4]) + del reshape_20 + + # pd_op.slice: (-1x4x144x32xf32) <- (3x-1x4x144x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x4x144x32xf32) <- (3x-1x4x144x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x4x144x32xf32) <- (3x-1x4x144x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_9 + + # pd_op.scale: (-1x4x144x32xf32) <- (-1x4x144x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_12, full_8, float("0"), True) + del slice_12 + + # pd_op.transpose: (-1x4x32x144xf32) <- (-1x4x144x32xf32) + transpose_10 = paddle._C_ops.transpose(slice_13, [0, 1, 3, 2]) + del slice_13 + + # pd_op.matmul: (-1x4x144x144xf32) <- (-1x4x144x32xf32, -1x4x32x144xf32) + matmul_7 = paddle._C_ops.matmul(scale_1, transpose_10, False, False) + del scale_1, transpose_10 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_21 = paddle._C_ops.reshape(data_3, full_int_array_7) + del data_3 + + # pd_op.index_select: (20736x4xf32) <- (529x4xf32, 20736xi64) + index_select_1 = paddle._C_ops.index_select(data_4, reshape_21, 0) + del data_4, reshape_21 + + # pd_op.reshape: (144x144x4xf32) <- (20736x4xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(index_select_1, full_int_array_8) + del index_select_1 + + # pd_op.transpose: (4x144x144xf32) <- (144x144x4xf32) + transpose_11 = paddle._C_ops.transpose(reshape_22, [2, 0, 1]) + del reshape_22 + + # pd_op.unsqueeze: (1x4x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_11, full_int_array_0) + del transpose_11 + + # pd_op.add: (-1x4x144x144xf32) <- (-1x4x144x144xf32, 1x4x144x144xf32) + add_10 = paddle._C_ops.add(matmul_7, unsqueeze_3) + del matmul_7, unsqueeze_3 + + # pd_op.full: (xi64) <- () + full_14 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_0 = paddle._C_ops.floor_divide(slice_11, full_14) + del full_14 + + # pd_op.full: (xi64) <- () + full_15 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_8 = [floor_divide_0, full_15, full_6, full_4, full_4] + del floor_divide_0, full_15 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_8 = paddle._C_ops.stack(combine_8, 0) + del combine_8 + + # pd_op.reshape: (-1x64x4x144x144xf32) <- (-1x4x144x144xf32, 5xi64) + reshape_23 = paddle._C_ops.reshape(add_10, stack_8) + del add_10, stack_8 + + # pd_op.unsqueeze: (64x1x144x144xf32) <- (64x144x144xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(where_1, full_int_array_1) + del where_1 + + # pd_op.unsqueeze: (1x64x1x144x144xf32) <- (64x1x144x144xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_4, full_int_array_0) + del unsqueeze_4 + + # pd_op.add: (-1x64x4x144x144xf32) <- (-1x64x4x144x144xf32, 1x64x1x144x144xf32) + add_11 = paddle._C_ops.add(reshape_23, unsqueeze_5) + del reshape_23, unsqueeze_5 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_9 = [slice_11, full_6, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_9 = paddle._C_ops.stack(combine_9, 0) + del combine_9 + + # pd_op.reshape: (-1x4x144x144xf32) <- (-1x64x4x144x144xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_11, stack_9) + del add_11, stack_9 + + # pd_op.softmax: (-1x4x144x144xf32) <- (-1x4x144x144xf32) + softmax_1 = paddle._C_ops.softmax(reshape_24, -1) + del reshape_24 + + # pd_op.matmul: (-1x4x144x32xf32) <- (-1x4x144x144xf32, -1x4x144x32xf32) + matmul_8 = paddle._C_ops.matmul(softmax_1, slice_14, False, False) + del slice_14, softmax_1 + + # pd_op.transpose: (-1x144x4x32xf32) <- (-1x4x144x32xf32) + transpose_12 = paddle._C_ops.transpose(matmul_8, [0, 2, 1, 3]) + del matmul_8 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_10 = [slice_11, full_4, full_1] + del slice_11 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_10 = paddle._C_ops.stack(combine_10, 0) + del combine_10 + + # pd_op.reshape: (-1x144x128xf32) <- (-1x144x4x32xf32, 3xi64) + reshape_25 = paddle._C_ops.reshape(transpose_12, stack_10) + del stack_10, transpose_12 + + # pd_op.matmul: (-1x144x128xf32) <- (-1x144x128xf32, 128x128xf32) + matmul_9 = paddle._C_ops.matmul(reshape_25, parameter_284, False, False) + del parameter_284, reshape_25 + + # pd_op.add: (-1x144x128xf32) <- (-1x144x128xf32, 128xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_283) + del matmul_9, parameter_283 + + # pd_op.reshape: (-1x12x12x128xf32) <- (-1x144x128xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_12, full_int_array_3) + del add_12, full_int_array_3 + + # pd_op.reshape: (-1x8x8x12x12x128xf32) <- (-1x12x12x128xf32, 6xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, full_int_array_9) + del full_int_array_9, reshape_26 + + # pd_op.transpose: (-1x8x12x8x12x128xf32) <- (-1x8x8x12x12x128xf32) + transpose_13 = paddle._C_ops.transpose(reshape_27, [0, 1, 3, 2, 4, 5]) + del reshape_27 + + # pd_op.reshape: (-1x96x96x128xf32) <- (-1x8x12x8x12x128xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(transpose_13, full_int_array_10) + del full_int_array_10, transpose_13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [6, 6] + + # pd_op.roll: (-1x96x96x128xf32) <- (-1x96x96x128xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_28, full_int_array_29, [1, 2]) + del reshape_28 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_11 = [slice_8, full_9, full_1] + del full_9, slice_8 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_11 = paddle._C_ops.stack(combine_11, 0) + del combine_11 + + # pd_op.reshape: (-1x9216x128xf32) <- (-1x96x96x128xf32, 3xi64) + reshape_29 = paddle._C_ops.reshape(roll_1, stack_11) + del roll_1, stack_11 + + # pd_op.add: (-1x9216x128xf32) <- (-1x9216x128xf32, -1x9216x128xf32) + add_13 = paddle._C_ops.add(add_8, reshape_29) + del add_8, reshape_29 + + # pd_op.layer_norm: (-1x9216x128xf32, -1x9216xf32, -1x9216xf32) <- (-1x9216x128xf32, 128xf32, 128xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_13, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (-1x9216x512xf32) <- (-1x9216x128xf32, 128x512xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del layer_norm_12, parameter_280 + + # pd_op.add: (-1x9216x512xf32) <- (-1x9216x512xf32, 512xf32) + add_14 = paddle._C_ops.add(matmul_10, parameter_279) + del matmul_10, parameter_279 + + # pd_op.gelu: (-1x9216x512xf32) <- (-1x9216x512xf32) + gelu_1 = paddle._C_ops.gelu(add_14, False) + del add_14 + + # pd_op.matmul: (-1x9216x128xf32) <- (-1x9216x512xf32, 512x128xf32) + matmul_11 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del gelu_1, parameter_278 + + # pd_op.add: (-1x9216x128xf32) <- (-1x9216x128xf32, 128xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_277) + del matmul_11, parameter_277 + + # pd_op.add: (-1x9216x128xf32) <- (-1x9216x128xf32, -1x9216x128xf32) + add_16 = paddle._C_ops.add(add_13, add_15) + del add_13, add_15 + + # pd_op.shape64: (3xi64) <- (-1x9216x128xf32) + shape64_9 = paddle._C_ops.shape64(add_16) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + shape64_9, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_9 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_12 = [slice_15, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_12 = paddle._C_ops.stack(combine_12, 0) + del combine_12 + + # pd_op.reshape: (-1x96x96x128xf32) <- (-1x9216x128xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_16, stack_12) + del add_16, stack_12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_30 = [2, 2] + + # pd_op.strided_slice: (-1x48x48x128xf32) <- (-1x96x96x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_31 = [1, 0] + + # pd_op.strided_slice: (-1x48x48x128xf32) <- (-1x96x96x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [0, 1] + + # pd_op.strided_slice: (-1x48x48x128xf32) <- (-1x96x96x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x48x48x128xf32) <- (-1x96x96x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + + # pd_op.shape64: (4xi64) <- (-1x96x96x128xf32) + shape64_10 = paddle._C_ops.shape64(reshape_30) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + shape64_10, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_10 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_13 = [slice_16, full_0, full_0, full_1] + del full_0, full_1, slice_16 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_13 = paddle._C_ops.stack(combine_13, 0) + del combine_13 + + # pd_op.reshape: (-1x96x96x128xf32) <- (-1x96x96x128xf32, 4xi64) + reshape_31 = paddle._C_ops.reshape(reshape_30, stack_13) + del reshape_30, stack_13 + + # pd_op.full: (1xi32) <- () + full_16 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([-1x48x48x128xf32, -1x48x48x128xf32, -1x48x48x128xf32, -1x48x48x128xf32]) <- (-1x48x48x128xf32, -1x48x48x128xf32, -1x48x48x128xf32, -1x48x48x128xf32) + combine_14 = [ + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + ] + del strided_slice_0, strided_slice_1, strided_slice_2, strided_slice_3 + + # pd_op.concat: (-1x48x48x512xf32) <- ([-1x48x48x128xf32, -1x48x48x128xf32, -1x48x48x128xf32, -1x48x48x128xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_14, full_16) + del combine_14 + + # pd_op.full: (xi64) <- () + full_17 = paddle._C_ops.full( + [], float("-1"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_18 = paddle._C_ops.full( + [], float("512"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_15 = [slice_15, full_17, full_18] + del slice_15 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_14 = paddle._C_ops.stack(combine_15, 0) + del combine_15 + + # pd_op.reshape: (-1x-1x512xf32) <- (-1x48x48x512xf32, 3xi64) + reshape_32 = paddle._C_ops.reshape(concat_0, stack_14) + del concat_0, stack_14 + + # pd_op.layer_norm: (-1x-1x512xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x512xf32, 512xf32, 512xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_32, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276, reshape_32 + + # pd_op.matmul: (-1x-1x256xf32) <- (-1x-1x512xf32, 512x256xf32) + matmul_12 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del layer_norm_15, parameter_274 + + # pd_op.shape64: (3xi64) <- (-1x-1x256xf32) + shape64_11 = paddle._C_ops.shape64(matmul_12) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + shape64_11, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_11 + + # pd_op.shape64: (3xi64) <- (-1x-1x256xf32) + shape64_12 = paddle._C_ops.shape64(matmul_12) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + shape64_12, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_12 + + # pd_op.layer_norm: (-1x-1x256xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x256xf32, 256xf32, 256xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_12, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full: (xi64) <- () + full_19 = paddle._C_ops.full( + [], float("48"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_20 = paddle._C_ops.full( + [], float("256"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_16 = [slice_17, full_19, full_19, full_20] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_15 = paddle._C_ops.stack(combine_16, 0) + del combine_16 + + # pd_op.reshape: (-1x48x48x256xf32) <- (-1x-1x256xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(layer_norm_18, stack_15) + del layer_norm_18, stack_15 + + # pd_op.shape64: (4xi64) <- (-1x48x48x256xf32) + shape64_13 = paddle._C_ops.shape64(reshape_33) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + shape64_13, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_13 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_17 = [slice_19, full_6, full_3, full_6, full_3, full_20] + del slice_19 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_16 = paddle._C_ops.stack(combine_17, 0) + del combine_17 + + # pd_op.reshape: (-1x4x12x4x12x256xf32) <- (-1x48x48x256xf32, 6xi64) + reshape_34 = paddle._C_ops.reshape(reshape_33, stack_16) + del reshape_33, stack_16 + + # pd_op.transpose: (-1x4x4x12x12x256xf32) <- (-1x4x12x4x12x256xf32) + transpose_14 = paddle._C_ops.transpose(reshape_34, [0, 1, 3, 2, 4, 5]) + del reshape_34 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_33 = [-1, 12, 12, 256] + + # pd_op.reshape: (-1x12x12x256xf32) <- (-1x4x4x12x12x256xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_14, full_int_array_33) + del transpose_14 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_34 = [-1, 144, 256] + + # pd_op.reshape: (-1x144x256xf32) <- (-1x12x12x256xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(reshape_35, full_int_array_34) + del reshape_35 + + # pd_op.shape64: (3xi64) <- (-1x144x256xf32) + shape64_14 = paddle._C_ops.shape64(reshape_36) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + shape64_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_14 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x256xf32, 256x768xf32) + matmul_13 = paddle._C_ops.matmul(reshape_36, parameter_271, False, False) + del parameter_271, reshape_36 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_17 = paddle._C_ops.add(matmul_13, parameter_270) + del matmul_13, parameter_270 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_18 = [slice_20, full_4, full_5, full_2, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_17 = paddle._C_ops.stack(combine_18, 0) + del combine_18 + + # pd_op.reshape: (-1x144x3x8x32xf32) <- (-1x144x768xf32, 5xi64) + reshape_37 = paddle._C_ops.reshape(add_17, stack_17) + del add_17, stack_17 + + # pd_op.transpose: (3x-1x8x144x32xf32) <- (-1x144x3x8x32xf32) + transpose_15 = paddle._C_ops.transpose(reshape_37, [2, 0, 3, 1, 4]) + del reshape_37 + + # pd_op.slice: (-1x8x144x32xf32) <- (3x-1x8x144x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x8x144x32xf32) <- (3x-1x8x144x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x8x144x32xf32) <- (3x-1x8x144x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_15 + + # pd_op.scale: (-1x8x144x32xf32) <- (-1x8x144x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_21, full_8, float("0"), True) + del slice_21 + + # pd_op.transpose: (-1x8x32x144xf32) <- (-1x8x144x32xf32) + transpose_16 = paddle._C_ops.transpose(slice_22, [0, 1, 3, 2]) + del slice_22 + + # pd_op.matmul: (-1x8x144x144xf32) <- (-1x8x144x32xf32, -1x8x32x144xf32) + matmul_14 = paddle._C_ops.matmul(scale_2, transpose_16, False, False) + del scale_2, transpose_16 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_38 = paddle._C_ops.reshape(data_5, full_int_array_7) + del data_5 + + # pd_op.index_select: (20736x8xf32) <- (529x8xf32, 20736xi64) + index_select_2 = paddle._C_ops.index_select(data_6, reshape_38, 0) + del data_6, reshape_38 + + # pd_op.reshape: (144x144x8xf32) <- (20736x8xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(index_select_2, full_int_array_8) + del index_select_2 + + # pd_op.transpose: (8x144x144xf32) <- (144x144x8xf32) + transpose_17 = paddle._C_ops.transpose(reshape_39, [2, 0, 1]) + del reshape_39 + + # pd_op.unsqueeze: (1x8x144x144xf32) <- (8x144x144xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_17, full_int_array_0) + del transpose_17 + + # pd_op.add: (-1x8x144x144xf32) <- (-1x8x144x144xf32, 1x8x144x144xf32) + add_18 = paddle._C_ops.add(matmul_14, unsqueeze_6) + del matmul_14, unsqueeze_6 + + # pd_op.softmax: (-1x8x144x144xf32) <- (-1x8x144x144xf32) + softmax_2 = paddle._C_ops.softmax(add_18, -1) + del add_18 + + # pd_op.matmul: (-1x8x144x32xf32) <- (-1x8x144x144xf32, -1x8x144x32xf32) + matmul_15 = paddle._C_ops.matmul(softmax_2, slice_23, False, False) + del slice_23, softmax_2 + + # pd_op.transpose: (-1x144x8x32xf32) <- (-1x8x144x32xf32) + transpose_18 = paddle._C_ops.transpose(matmul_15, [0, 2, 1, 3]) + del matmul_15 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_19 = [slice_20, full_4, full_20] + del slice_20 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_18 = paddle._C_ops.stack(combine_19, 0) + del combine_19 + + # pd_op.reshape: (-1x144x256xf32) <- (-1x144x8x32xf32, 3xi64) + reshape_40 = paddle._C_ops.reshape(transpose_18, stack_18) + del stack_18, transpose_18 + + # pd_op.matmul: (-1x144x256xf32) <- (-1x144x256xf32, 256x256xf32) + matmul_16 = paddle._C_ops.matmul(reshape_40, parameter_269, False, False) + del parameter_269, reshape_40 + + # pd_op.add: (-1x144x256xf32) <- (-1x144x256xf32, 256xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_268) + del matmul_16, parameter_268 + + # pd_op.reshape: (-1x12x12x256xf32) <- (-1x144x256xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_19, full_int_array_33) + del add_19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [-1, 4, 4, 12, 12, 256] + + # pd_op.reshape: (-1x4x4x12x12x256xf32) <- (-1x12x12x256xf32, 6xi64) + reshape_42 = paddle._C_ops.reshape(reshape_41, full_int_array_35) + del reshape_41 + + # pd_op.transpose: (-1x4x12x4x12x256xf32) <- (-1x4x4x12x12x256xf32) + transpose_19 = paddle._C_ops.transpose(reshape_42, [0, 1, 3, 2, 4, 5]) + del reshape_42 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 48, 48, 256] + + # pd_op.reshape: (-1x48x48x256xf32) <- (-1x4x12x4x12x256xf32, 4xi64) + reshape_43 = paddle._C_ops.reshape(transpose_19, full_int_array_36) + del transpose_19 + + # pd_op.full: (xi64) <- () + full_21 = paddle._C_ops.full( + [], float("2304"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_20 = [slice_17, full_21, full_20] + del slice_17 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_19 = paddle._C_ops.stack(combine_20, 0) + del combine_20 + + # pd_op.reshape: (-1x2304x256xf32) <- (-1x48x48x256xf32, 3xi64) + reshape_44 = paddle._C_ops.reshape(reshape_43, stack_19) + del reshape_43, stack_19 + + # pd_op.add: (-1x2304x256xf32) <- (-1x-1x256xf32, -1x2304x256xf32) + add_20 = paddle._C_ops.add(matmul_12, reshape_44) + del matmul_12, reshape_44 + + # pd_op.layer_norm: (-1x2304x256xf32, -1x2304xf32, -1x2304xf32) <- (-1x2304x256xf32, 256xf32, 256xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (-1x2304x1024xf32) <- (-1x2304x256xf32, 256x1024xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del layer_norm_21, parameter_265 + + # pd_op.add: (-1x2304x1024xf32) <- (-1x2304x1024xf32, 1024xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_264) + del matmul_17, parameter_264 + + # pd_op.gelu: (-1x2304x1024xf32) <- (-1x2304x1024xf32) + gelu_2 = paddle._C_ops.gelu(add_21, False) + del add_21 + + # pd_op.matmul: (-1x2304x256xf32) <- (-1x2304x1024xf32, 1024x256xf32) + matmul_18 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del gelu_2, parameter_263 + + # pd_op.add: (-1x2304x256xf32) <- (-1x2304x256xf32, 256xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_262) + del matmul_18, parameter_262 + + # pd_op.add: (-1x2304x256xf32) <- (-1x2304x256xf32, -1x2304x256xf32) + add_23 = paddle._C_ops.add(add_20, add_22) + del add_20, add_22 + + # pd_op.shape64: (3xi64) <- (-1x2304x256xf32) + shape64_15 = paddle._C_ops.shape64(add_23) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + shape64_15, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_15 + + # pd_op.layer_norm: (-1x2304x256xf32, -1x2304xf32, -1x2304xf32) <- (-1x2304x256xf32, 256xf32, 256xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_23, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_21 = [slice_24, full_19, full_19, full_20] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_20 = paddle._C_ops.stack(combine_21, 0) + del combine_21 + + # pd_op.reshape: (-1x48x48x256xf32) <- (-1x2304x256xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(layer_norm_24, stack_20) + del layer_norm_24, stack_20 + + # pd_op.shape64: (4xi64) <- (-1x48x48x256xf32) + shape64_16 = paddle._C_ops.shape64(reshape_45) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + shape64_16, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_16 + + # pd_op.roll: (-1x48x48x256xf32) <- (-1x48x48x256xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_45, full_int_array_11, [1, 2]) + del reshape_45 + + # pd_op.shape64: (4xi64) <- (-1x48x48x256xf32) + shape64_17 = paddle._C_ops.shape64(roll_2) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + shape64_17, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_17 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_22 = [slice_26, full_6, full_3, full_6, full_3, full_20] + del slice_26 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_21 = paddle._C_ops.stack(combine_22, 0) + del combine_22 + + # pd_op.reshape: (-1x4x12x4x12x256xf32) <- (-1x48x48x256xf32, 6xi64) + reshape_46 = paddle._C_ops.reshape(roll_2, stack_21) + del roll_2, stack_21 + + # pd_op.transpose: (-1x4x4x12x12x256xf32) <- (-1x4x12x4x12x256xf32) + transpose_20 = paddle._C_ops.transpose(reshape_46, [0, 1, 3, 2, 4, 5]) + del reshape_46 + + # pd_op.reshape: (-1x12x12x256xf32) <- (-1x4x4x12x12x256xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(transpose_20, full_int_array_33) + del transpose_20 + + # pd_op.reshape: (-1x144x256xf32) <- (-1x12x12x256xf32, 3xi64) + reshape_48 = paddle._C_ops.reshape(reshape_47, full_int_array_34) + del full_int_array_34, reshape_47 + + # pd_op.full: (1x48x48x1xf32) <- () + full_22 = paddle._C_ops.full( + [1, 48, 48, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_22, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_22 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_37 = [1, 4, 12, 4, 12, 1] + + # pd_op.reshape: (1x4x12x4x12x1xf32) <- (1x48x48x1xf32, 6xi64) + reshape_49 = paddle._C_ops.reshape(set_value__1, full_int_array_37) + del full_int_array_37 + + # pd_op.transpose: (1x4x4x12x12x1xf32) <- (1x4x12x4x12x1xf32) + transpose_21 = paddle._C_ops.transpose(reshape_49, [0, 1, 3, 2, 4, 5]) + del reshape_49 + + # pd_op.reshape: (16x12x12x1xf32) <- (1x4x4x12x12x1xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(transpose_21, full_int_array_27) + del transpose_21 + + # pd_op.reshape: (16x144xf32) <- (16x12x12x1xf32, 2xi64) + reshape_51 = paddle._C_ops.reshape(reshape_50, full_int_array_28) + del reshape_50 + + # pd_op.unsqueeze: (16x1x144xf32) <- (16x144xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(reshape_51, full_int_array_1) + + # pd_op.unsqueeze: (16x144x1xf32) <- (16x144xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(reshape_51, full_int_array_5) + del reshape_51 + + # pd_op.subtract: (16x144x144xf32) <- (16x1x144xf32, 16x144x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_7, unsqueeze_8) + del unsqueeze_7, unsqueeze_8 + + # pd_op.not_equal: (16x144x144xb) <- (16x144x144xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_11) + + # pd_op.full: (16x144x144xf32) <- () + full_23 = paddle._C_ops.full( + [16, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x144x144xf32) <- (16x144x144xb, 16x144x144xf32, 16x144x144xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_23, subtract_1) + del full_23, not_equal_1, subtract_1 + + # pd_op.equal: (16x144x144xb) <- (16x144x144xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_11) + + # pd_op.full: (16x144x144xf32) <- () + full_24 = paddle._C_ops.full( + [16, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x144x144xf32) <- (16x144x144xb, 16x144x144xf32, 16x144x144xf32) + where_3 = paddle._C_ops.where(equal_1, full_24, where_2) + del equal_1, full_24, where_2 + + # pd_op.shape64: (3xi64) <- (-1x144x256xf32) + shape64_18 = paddle._C_ops.shape64(reshape_48) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + shape64_18, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_18 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x256xf32, 256x768xf32) + matmul_19 = paddle._C_ops.matmul(reshape_48, parameter_259, False, False) + del parameter_259, reshape_48 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_24 = paddle._C_ops.add(matmul_19, parameter_258) + del matmul_19, parameter_258 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_23 = [slice_27, full_4, full_5, full_2, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_22 = paddle._C_ops.stack(combine_23, 0) + del combine_23 + + # pd_op.reshape: (-1x144x3x8x32xf32) <- (-1x144x768xf32, 5xi64) + reshape_52 = paddle._C_ops.reshape(add_24, stack_22) + del add_24, stack_22 + + # pd_op.transpose: (3x-1x8x144x32xf32) <- (-1x144x3x8x32xf32) + transpose_22 = paddle._C_ops.transpose(reshape_52, [2, 0, 3, 1, 4]) + del reshape_52 + + # pd_op.slice: (-1x8x144x32xf32) <- (3x-1x8x144x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x8x144x32xf32) <- (3x-1x8x144x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x8x144x32xf32) <- (3x-1x8x144x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_22 + + # pd_op.scale: (-1x8x144x32xf32) <- (-1x8x144x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_28, full_8, float("0"), True) + del slice_28 + + # pd_op.transpose: (-1x8x32x144xf32) <- (-1x8x144x32xf32) + transpose_23 = paddle._C_ops.transpose(slice_29, [0, 1, 3, 2]) + del slice_29 + + # pd_op.matmul: (-1x8x144x144xf32) <- (-1x8x144x32xf32, -1x8x32x144xf32) + matmul_20 = paddle._C_ops.matmul(scale_3, transpose_23, False, False) + del scale_3, transpose_23 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_53 = paddle._C_ops.reshape(data_7, full_int_array_7) + del data_7 + + # pd_op.index_select: (20736x8xf32) <- (529x8xf32, 20736xi64) + index_select_3 = paddle._C_ops.index_select(data_8, reshape_53, 0) + del data_8, reshape_53 + + # pd_op.reshape: (144x144x8xf32) <- (20736x8xf32, 3xi64) + reshape_54 = paddle._C_ops.reshape(index_select_3, full_int_array_8) + del index_select_3 + + # pd_op.transpose: (8x144x144xf32) <- (144x144x8xf32) + transpose_24 = paddle._C_ops.transpose(reshape_54, [2, 0, 1]) + del reshape_54 + + # pd_op.unsqueeze: (1x8x144x144xf32) <- (8x144x144xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_24, full_int_array_0) + del transpose_24 + + # pd_op.add: (-1x8x144x144xf32) <- (-1x8x144x144xf32, 1x8x144x144xf32) + add_25 = paddle._C_ops.add(matmul_20, unsqueeze_9) + del matmul_20, unsqueeze_9 + + # pd_op.full: (xi64) <- () + full_25 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_1 = paddle._C_ops.floor_divide(slice_27, full_25) + del full_25 + + # pd_op.full: (xi64) <- () + full_26 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_24 = [floor_divide_1, full_26, full_2, full_4, full_4] + del floor_divide_1 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_23 = paddle._C_ops.stack(combine_24, 0) + del combine_24 + + # pd_op.reshape: (-1x16x8x144x144xf32) <- (-1x8x144x144xf32, 5xi64) + reshape_55 = paddle._C_ops.reshape(add_25, stack_23) + del add_25, stack_23 + + # pd_op.unsqueeze: (16x1x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(where_3, full_int_array_1) + del where_3 + + # pd_op.unsqueeze: (1x16x1x144x144xf32) <- (16x1x144x144xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_10, full_int_array_0) + del unsqueeze_10 + + # pd_op.add: (-1x16x8x144x144xf32) <- (-1x16x8x144x144xf32, 1x16x1x144x144xf32) + add_26 = paddle._C_ops.add(reshape_55, unsqueeze_11) + del reshape_55, unsqueeze_11 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_25 = [slice_27, full_2, full_4, full_4] + del full_2 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_24 = paddle._C_ops.stack(combine_25, 0) + del combine_25 + + # pd_op.reshape: (-1x8x144x144xf32) <- (-1x16x8x144x144xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(add_26, stack_24) + del add_26, stack_24 + + # pd_op.softmax: (-1x8x144x144xf32) <- (-1x8x144x144xf32) + softmax_3 = paddle._C_ops.softmax(reshape_56, -1) + del reshape_56 + + # pd_op.matmul: (-1x8x144x32xf32) <- (-1x8x144x144xf32, -1x8x144x32xf32) + matmul_21 = paddle._C_ops.matmul(softmax_3, slice_30, False, False) + del slice_30, softmax_3 + + # pd_op.transpose: (-1x144x8x32xf32) <- (-1x8x144x32xf32) + transpose_25 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_26 = [slice_27, full_4, full_20] + del slice_27 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_25 = paddle._C_ops.stack(combine_26, 0) + del combine_26 + + # pd_op.reshape: (-1x144x256xf32) <- (-1x144x8x32xf32, 3xi64) + reshape_57 = paddle._C_ops.reshape(transpose_25, stack_25) + del stack_25, transpose_25 + + # pd_op.matmul: (-1x144x256xf32) <- (-1x144x256xf32, 256x256xf32) + matmul_22 = paddle._C_ops.matmul(reshape_57, parameter_257, False, False) + del parameter_257, reshape_57 + + # pd_op.add: (-1x144x256xf32) <- (-1x144x256xf32, 256xf32) + add_27 = paddle._C_ops.add(matmul_22, parameter_256) + del matmul_22, parameter_256 + + # pd_op.reshape: (-1x12x12x256xf32) <- (-1x144x256xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(add_27, full_int_array_33) + del add_27, full_int_array_33 + + # pd_op.reshape: (-1x4x4x12x12x256xf32) <- (-1x12x12x256xf32, 6xi64) + reshape_59 = paddle._C_ops.reshape(reshape_58, full_int_array_35) + del full_int_array_35, reshape_58 + + # pd_op.transpose: (-1x4x12x4x12x256xf32) <- (-1x4x4x12x12x256xf32) + transpose_26 = paddle._C_ops.transpose(reshape_59, [0, 1, 3, 2, 4, 5]) + del reshape_59 + + # pd_op.reshape: (-1x48x48x256xf32) <- (-1x4x12x4x12x256xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(transpose_26, full_int_array_36) + del full_int_array_36, transpose_26 + + # pd_op.roll: (-1x48x48x256xf32) <- (-1x48x48x256xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_60, full_int_array_29, [1, 2]) + del reshape_60 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_27 = [slice_24, full_21, full_20] + del full_21, slice_24 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_26 = paddle._C_ops.stack(combine_27, 0) + del combine_27 + + # pd_op.reshape: (-1x2304x256xf32) <- (-1x48x48x256xf32, 3xi64) + reshape_61 = paddle._C_ops.reshape(roll_3, stack_26) + del roll_3, stack_26 + + # pd_op.add: (-1x2304x256xf32) <- (-1x2304x256xf32, -1x2304x256xf32) + add_28 = paddle._C_ops.add(add_23, reshape_61) + del add_23, reshape_61 + + # pd_op.layer_norm: (-1x2304x256xf32, -1x2304xf32, -1x2304xf32) <- (-1x2304x256xf32, 256xf32, 256xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_28, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (-1x2304x1024xf32) <- (-1x2304x256xf32, 256x1024xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del layer_norm_27, parameter_253 + + # pd_op.add: (-1x2304x1024xf32) <- (-1x2304x1024xf32, 1024xf32) + add_29 = paddle._C_ops.add(matmul_23, parameter_252) + del matmul_23, parameter_252 + + # pd_op.gelu: (-1x2304x1024xf32) <- (-1x2304x1024xf32) + gelu_3 = paddle._C_ops.gelu(add_29, False) + del add_29 + + # pd_op.matmul: (-1x2304x256xf32) <- (-1x2304x1024xf32, 1024x256xf32) + matmul_24 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del gelu_3, parameter_251 + + # pd_op.add: (-1x2304x256xf32) <- (-1x2304x256xf32, 256xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_250) + del matmul_24, parameter_250 + + # pd_op.add: (-1x2304x256xf32) <- (-1x2304x256xf32, -1x2304x256xf32) + add_31 = paddle._C_ops.add(add_28, add_30) + del add_28, add_30 + + # pd_op.shape64: (3xi64) <- (-1x2304x256xf32) + shape64_19 = paddle._C_ops.shape64(add_31) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + shape64_19, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_19 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_28 = [slice_31, full_19, full_19, full_20] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_27 = paddle._C_ops.stack(combine_28, 0) + del combine_28 + + # pd_op.reshape: (-1x48x48x256xf32) <- (-1x2304x256xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(add_31, stack_27) + del add_31, stack_27 + + # pd_op.strided_slice: (-1x24x24x256xf32) <- (-1x48x48x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x24x24x256xf32) <- (-1x48x48x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x24x24x256xf32) <- (-1x48x48x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x24x24x256xf32) <- (-1x48x48x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + + # pd_op.shape64: (4xi64) <- (-1x48x48x256xf32) + shape64_20 = paddle._C_ops.shape64(reshape_62) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + shape64_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_20 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_29 = [slice_32, full_19, full_19, full_20] + del full_19, full_20, slice_32 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_28 = paddle._C_ops.stack(combine_29, 0) + del combine_29 + + # pd_op.reshape: (-1x48x48x256xf32) <- (-1x48x48x256xf32, 4xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, stack_28) + del reshape_62, stack_28 + + # builtin.combine: ([-1x24x24x256xf32, -1x24x24x256xf32, -1x24x24x256xf32, -1x24x24x256xf32]) <- (-1x24x24x256xf32, -1x24x24x256xf32, -1x24x24x256xf32, -1x24x24x256xf32) + combine_30 = [ + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + ] + del strided_slice_4, strided_slice_5, strided_slice_6, strided_slice_7 + + # pd_op.concat: (-1x24x24x1024xf32) <- ([-1x24x24x256xf32, -1x24x24x256xf32, -1x24x24x256xf32, -1x24x24x256xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_30, full_16) + del combine_30 + + # pd_op.full: (xi64) <- () + full_27 = paddle._C_ops.full( + [], float("1024"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_31 = [slice_31, full_17, full_27] + del slice_31 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_29 = paddle._C_ops.stack(combine_31, 0) + del combine_31 + + # pd_op.reshape: (-1x-1x1024xf32) <- (-1x24x24x1024xf32, 3xi64) + reshape_64 = paddle._C_ops.reshape(concat_1, stack_29) + del concat_1, stack_29 + + # pd_op.layer_norm: (-1x-1x1024xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1024xf32, 1024xf32, 1024xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_64, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249, reshape_64 + + # pd_op.matmul: (-1x-1x512xf32) <- (-1x-1x1024xf32, 1024x512xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del layer_norm_30, parameter_247 + + # pd_op.shape64: (3xi64) <- (-1x-1x512xf32) + shape64_21 = paddle._C_ops.shape64(matmul_25) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + shape64_21, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_21 + + # pd_op.shape64: (3xi64) <- (-1x-1x512xf32) + shape64_22 = paddle._C_ops.shape64(matmul_25) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + shape64_22, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_22 + + # pd_op.layer_norm: (-1x-1x512xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x512xf32, 512xf32, 512xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_25, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full: (xi64) <- () + full_28 = paddle._C_ops.full( + [], float("24"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_32 = [slice_33, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_30 = paddle._C_ops.stack(combine_32, 0) + del combine_32 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x-1x512xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(layer_norm_33, stack_30) + del layer_norm_33, stack_30 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_23 = paddle._C_ops.shape64(reshape_65) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + shape64_23, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_23 + + # pd_op.full: (xi64) <- () + full_29 = paddle._C_ops.full( + [], float("2"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_33 = [slice_35, full_29, full_3, full_29, full_3, full_18] + del slice_35 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_31 = paddle._C_ops.stack(combine_33, 0) + del combine_33 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, stack_31) + del reshape_65, stack_31 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_27 = paddle._C_ops.transpose(reshape_66, [0, 1, 3, 2, 4, 5]) + del reshape_66 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_38 = [-1, 12, 12, 512] + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_67 = paddle._C_ops.reshape(transpose_27, full_int_array_38) + del transpose_27 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_39 = [-1, 144, 512] + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_68 = paddle._C_ops.reshape(reshape_67, full_int_array_39) + del reshape_67 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_24 = paddle._C_ops.shape64(reshape_68) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + shape64_24, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_24 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_26 = paddle._C_ops.matmul(reshape_68, parameter_244, False, False) + del parameter_244, reshape_68 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_32 = paddle._C_ops.add(matmul_26, parameter_243) + del matmul_26, parameter_243 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_34 = [slice_36, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_32 = paddle._C_ops.stack(combine_34, 0) + del combine_34 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_69 = paddle._C_ops.reshape(add_32, stack_32) + del add_32, stack_32 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_28 = paddle._C_ops.transpose(reshape_69, [2, 0, 3, 1, 4]) + del reshape_69 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_28 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_37, full_8, float("0"), True) + del slice_37 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_29 = paddle._C_ops.transpose(slice_38, [0, 1, 3, 2]) + del slice_38 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_27 = paddle._C_ops.matmul(scale_4, transpose_29, False, False) + del scale_4, transpose_29 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_70 = paddle._C_ops.reshape(data_9, full_int_array_7) + del data_9 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_4 = paddle._C_ops.index_select(data_10, reshape_70, 0) + del data_10, reshape_70 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_71 = paddle._C_ops.reshape(index_select_4, full_int_array_8) + del index_select_4 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_30 = paddle._C_ops.transpose(reshape_71, [2, 0, 1]) + del reshape_71 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_30, full_int_array_0) + del transpose_30 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_33 = paddle._C_ops.add(matmul_27, unsqueeze_12) + del matmul_27, unsqueeze_12 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_4 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_28 = paddle._C_ops.matmul(softmax_4, slice_39, False, False) + del slice_39, softmax_4 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_31 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_35 = [slice_36, full_4, full_18] + del slice_36 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_33 = paddle._C_ops.stack(combine_35, 0) + del combine_35 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(transpose_31, stack_33) + del stack_33, transpose_31 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_29 = paddle._C_ops.matmul(reshape_72, parameter_242, False, False) + del parameter_242, reshape_72 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_34 = paddle._C_ops.add(matmul_29, parameter_241) + del matmul_29, parameter_241 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(add_34, full_int_array_38) + del add_34 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_40 = [-1, 2, 2, 12, 12, 512] + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_74 = paddle._C_ops.reshape(reshape_73, full_int_array_40) + del reshape_73 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_32 = paddle._C_ops.transpose(reshape_74, [0, 1, 3, 2, 4, 5]) + del reshape_74 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_41 = [-1, 24, 24, 512] + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_75 = paddle._C_ops.reshape(transpose_32, full_int_array_41) + del transpose_32 + + # pd_op.full: (xi64) <- () + full_30 = paddle._C_ops.full( + [], float("576"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_36 = [slice_33, full_30, full_18] + del slice_33 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_34 = paddle._C_ops.stack(combine_36, 0) + del combine_36 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_76 = paddle._C_ops.reshape(reshape_75, stack_34) + del reshape_75, stack_34 + + # pd_op.add: (-1x576x512xf32) <- (-1x-1x512xf32, -1x576x512xf32) + add_35 = paddle._C_ops.add(matmul_25, reshape_76) + del matmul_25, reshape_76 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del layer_norm_36, parameter_238 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_36 = paddle._C_ops.add(matmul_30, parameter_237) + del matmul_30, parameter_237 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_4 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_31 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del gelu_4, parameter_236 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_37 = paddle._C_ops.add(matmul_31, parameter_235) + del matmul_31, parameter_235 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_38 = paddle._C_ops.add(add_35, add_37) + del add_35, add_37 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_25 = paddle._C_ops.shape64(add_38) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + shape64_25, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_25 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_37 = [slice_40, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_35 = paddle._C_ops.stack(combine_37, 0) + del combine_37 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(layer_norm_39, stack_35) + del layer_norm_39, stack_35 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_26 = paddle._C_ops.shape64(reshape_77) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + shape64_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_26 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_77, full_int_array_11, [1, 2]) + del reshape_77 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_27 = paddle._C_ops.shape64(roll_4) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + shape64_27, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_38 = [slice_42, full_29, full_3, full_29, full_3, full_18] + del slice_42 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_36 = paddle._C_ops.stack(combine_38, 0) + del combine_38 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_78 = paddle._C_ops.reshape(roll_4, stack_36) + del roll_4, stack_36 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_33 = paddle._C_ops.transpose(reshape_78, [0, 1, 3, 2, 4, 5]) + del reshape_78 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_33, full_int_array_38) + del transpose_33 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, full_int_array_39) + del reshape_79 + + # pd_op.full: (1x24x24x1xf32) <- () + full_31 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_31, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_31 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_42 = [1, 2, 12, 2, 12, 1] + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_81 = paddle._C_ops.reshape(set_value__2, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_34 = paddle._C_ops.transpose(reshape_81, [0, 1, 3, 2, 4, 5]) + del reshape_81 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_34, full_int_array_27) + del transpose_34 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_28) + del reshape_82 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(reshape_83, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(reshape_83, full_int_array_5) + del reshape_83 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_13, unsqueeze_14) + del unsqueeze_13, unsqueeze_14 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_11) + + # pd_op.full: (4x144x144xf32) <- () + full_32 = paddle._C_ops.full( + [4, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_32, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_11) + + # pd_op.full: (4x144x144xf32) <- () + full_33 = paddle._C_ops.full( + [4, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_5 = paddle._C_ops.where(equal_2, full_33, where_4) + del equal_2, where_4 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_28 = paddle._C_ops.shape64(reshape_80) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + shape64_28, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_28 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_32 = paddle._C_ops.matmul(reshape_80, parameter_232, False, False) + del parameter_232, reshape_80 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_39 = paddle._C_ops.add(matmul_32, parameter_231) + del matmul_32, parameter_231 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_39 = [slice_43, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_37 = paddle._C_ops.stack(combine_39, 0) + del combine_39 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_84 = paddle._C_ops.reshape(add_39, stack_37) + del add_39, stack_37 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_35 = paddle._C_ops.transpose(reshape_84, [2, 0, 3, 1, 4]) + del reshape_84 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_35 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_44, full_8, float("0"), True) + del slice_44 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_36 = paddle._C_ops.transpose(slice_45, [0, 1, 3, 2]) + del slice_45 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_33 = paddle._C_ops.matmul(scale_5, transpose_36, False, False) + del scale_5, transpose_36 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_85 = paddle._C_ops.reshape(data_11, full_int_array_7) + del data_11 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_5 = paddle._C_ops.index_select(data_12, reshape_85, 0) + del data_12, reshape_85 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(index_select_5, full_int_array_8) + del index_select_5 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_37 = paddle._C_ops.transpose(reshape_86, [2, 0, 1]) + del reshape_86 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_37, full_int_array_0) + del transpose_37 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_40 = paddle._C_ops.add(matmul_33, unsqueeze_15) + del matmul_33, unsqueeze_15 + + # pd_op.full: (xi64) <- () + full_34 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_2 = paddle._C_ops.floor_divide(slice_43, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_40 = [floor_divide_2, full_6, full_26, full_4, full_4] + del floor_divide_2 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_38 = paddle._C_ops.stack(combine_40, 0) + del combine_40 + + # pd_op.reshape: (-1x4x16x144x144xf32) <- (-1x16x144x144xf32, 5xi64) + reshape_87 = paddle._C_ops.reshape(add_40, stack_38) + del add_40, stack_38 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(where_5, full_int_array_1) + del where_5 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_16, full_int_array_0) + del unsqueeze_16 + + # pd_op.add: (-1x4x16x144x144xf32) <- (-1x4x16x144x144xf32, 1x4x1x144x144xf32) + add_41 = paddle._C_ops.add(reshape_87, unsqueeze_17) + del reshape_87, unsqueeze_17 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_41 = [slice_43, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_39 = paddle._C_ops.stack(combine_41, 0) + del combine_41 + + # pd_op.reshape: (-1x16x144x144xf32) <- (-1x4x16x144x144xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(add_41, stack_39) + del add_41, stack_39 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_5 = paddle._C_ops.softmax(reshape_88, -1) + del reshape_88 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_34 = paddle._C_ops.matmul(softmax_5, slice_46, False, False) + del slice_46, softmax_5 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_38 = paddle._C_ops.transpose(matmul_34, [0, 2, 1, 3]) + del matmul_34 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_42 = [slice_43, full_4, full_18] + del slice_43 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_40 = paddle._C_ops.stack(combine_42, 0) + del combine_42 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(transpose_38, stack_40) + del stack_40, transpose_38 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_35 = paddle._C_ops.matmul(reshape_89, parameter_230, False, False) + del parameter_230, reshape_89 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_42 = paddle._C_ops.add(matmul_35, parameter_229) + del matmul_35, parameter_229 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(add_42, full_int_array_38) + del add_42 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_91 = paddle._C_ops.reshape(reshape_90, full_int_array_40) + del reshape_90 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_39 = paddle._C_ops.transpose(reshape_91, [0, 1, 3, 2, 4, 5]) + del reshape_91 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(transpose_39, full_int_array_41) + del transpose_39 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_92, full_int_array_29, [1, 2]) + del reshape_92 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_43 = [slice_40, full_30, full_18] + del slice_40 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_41 = paddle._C_ops.stack(combine_43, 0) + del combine_43 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_93 = paddle._C_ops.reshape(roll_5, stack_41) + del roll_5, stack_41 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_43 = paddle._C_ops.add(add_38, reshape_93) + del add_38, reshape_93 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_43, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_36 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del layer_norm_42, parameter_226 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_44 = paddle._C_ops.add(matmul_36, parameter_225) + del matmul_36, parameter_225 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_5 = paddle._C_ops.gelu(add_44, False) + del add_44 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_37 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del gelu_5, parameter_224 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_45 = paddle._C_ops.add(matmul_37, parameter_223) + del matmul_37, parameter_223 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_46 = paddle._C_ops.add(add_43, add_45) + del add_43, add_45 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_29 = paddle._C_ops.shape64(add_46) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + shape64_29, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_29 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_46, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_44 = [slice_47, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_42 = paddle._C_ops.stack(combine_44, 0) + del combine_44 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_94 = paddle._C_ops.reshape(layer_norm_45, stack_42) + del layer_norm_45, stack_42 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_30 = paddle._C_ops.shape64(reshape_94) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + shape64_30, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_30 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_45 = [slice_48, full_29, full_3, full_29, full_3, full_18] + del slice_48 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_43 = paddle._C_ops.stack(combine_45, 0) + del combine_45 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_95 = paddle._C_ops.reshape(reshape_94, stack_43) + del reshape_94, stack_43 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_40 = paddle._C_ops.transpose(reshape_95, [0, 1, 3, 2, 4, 5]) + del reshape_95 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_40, full_int_array_38) + del transpose_40 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, full_int_array_39) + del reshape_96 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_31 = paddle._C_ops.shape64(reshape_97) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + shape64_31, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_31 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_38 = paddle._C_ops.matmul(reshape_97, parameter_220, False, False) + del parameter_220, reshape_97 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_47 = paddle._C_ops.add(matmul_38, parameter_219) + del matmul_38, parameter_219 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_46 = [slice_49, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_44 = paddle._C_ops.stack(combine_46, 0) + del combine_46 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_98 = paddle._C_ops.reshape(add_47, stack_44) + del add_47, stack_44 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_41 = paddle._C_ops.transpose(reshape_98, [2, 0, 3, 1, 4]) + del reshape_98 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_41 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_50, full_8, float("0"), True) + del slice_50 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_42 = paddle._C_ops.transpose(slice_51, [0, 1, 3, 2]) + del slice_51 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_39 = paddle._C_ops.matmul(scale_6, transpose_42, False, False) + del scale_6, transpose_42 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_99 = paddle._C_ops.reshape(data_13, full_int_array_7) + del data_13 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_6 = paddle._C_ops.index_select(data_14, reshape_99, 0) + del data_14, reshape_99 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(index_select_6, full_int_array_8) + del index_select_6 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_43 = paddle._C_ops.transpose(reshape_100, [2, 0, 1]) + del reshape_100 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_43, full_int_array_0) + del transpose_43 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_48 = paddle._C_ops.add(matmul_39, unsqueeze_18) + del matmul_39, unsqueeze_18 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_6 = paddle._C_ops.softmax(add_48, -1) + del add_48 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_40 = paddle._C_ops.matmul(softmax_6, slice_52, False, False) + del slice_52, softmax_6 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_44 = paddle._C_ops.transpose(matmul_40, [0, 2, 1, 3]) + del matmul_40 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_47 = [slice_49, full_4, full_18] + del slice_49 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_45 = paddle._C_ops.stack(combine_47, 0) + del combine_47 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_101 = paddle._C_ops.reshape(transpose_44, stack_45) + del stack_45, transpose_44 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_41 = paddle._C_ops.matmul(reshape_101, parameter_218, False, False) + del parameter_218, reshape_101 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_49 = paddle._C_ops.add(matmul_41, parameter_217) + del matmul_41, parameter_217 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_102 = paddle._C_ops.reshape(add_49, full_int_array_38) + del add_49 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_103 = paddle._C_ops.reshape(reshape_102, full_int_array_40) + del reshape_102 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_45 = paddle._C_ops.transpose(reshape_103, [0, 1, 3, 2, 4, 5]) + del reshape_103 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(transpose_45, full_int_array_41) + del transpose_45 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_48 = [slice_47, full_30, full_18] + del slice_47 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_46 = paddle._C_ops.stack(combine_48, 0) + del combine_48 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_105 = paddle._C_ops.reshape(reshape_104, stack_46) + del reshape_104, stack_46 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_50 = paddle._C_ops.add(add_46, reshape_105) + del add_46, reshape_105 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_50, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del layer_norm_48, parameter_214 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_51 = paddle._C_ops.add(matmul_42, parameter_213) + del matmul_42, parameter_213 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_6 = paddle._C_ops.gelu(add_51, False) + del add_51 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_43 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del gelu_6, parameter_212 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_52 = paddle._C_ops.add(matmul_43, parameter_211) + del matmul_43, parameter_211 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_53 = paddle._C_ops.add(add_50, add_52) + del add_50, add_52 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_32 = paddle._C_ops.shape64(add_53) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + shape64_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_32 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_49 = [slice_53, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_47 = paddle._C_ops.stack(combine_49, 0) + del combine_49 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_106 = paddle._C_ops.reshape(layer_norm_51, stack_47) + del layer_norm_51, stack_47 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_33 = paddle._C_ops.shape64(reshape_106) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + shape64_33, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_33 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_106, full_int_array_11, [1, 2]) + del reshape_106 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_34 = paddle._C_ops.shape64(roll_6) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + shape64_34, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_34 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_50 = [slice_55, full_29, full_3, full_29, full_3, full_18] + del slice_55 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_48 = paddle._C_ops.stack(combine_50, 0) + del combine_50 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_107 = paddle._C_ops.reshape(roll_6, stack_48) + del roll_6, stack_48 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_46 = paddle._C_ops.transpose(reshape_107, [0, 1, 3, 2, 4, 5]) + del reshape_107 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_46, full_int_array_38) + del transpose_46 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_39) + del reshape_108 + + # pd_op.full: (1x24x24x1xf32) <- () + full_35 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_35, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_35 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_110 = paddle._C_ops.reshape(set_value__3, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_47 = paddle._C_ops.transpose(reshape_110, [0, 1, 3, 2, 4, 5]) + del reshape_110 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_111 = paddle._C_ops.reshape(transpose_47, full_int_array_27) + del transpose_47 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_112 = paddle._C_ops.reshape(reshape_111, full_int_array_28) + del reshape_111 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(reshape_112, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(reshape_112, full_int_array_5) + del reshape_112 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_19, unsqueeze_20) + del unsqueeze_19, unsqueeze_20 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_32, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_7 = paddle._C_ops.where(equal_3, full_33, where_6) + del equal_3, where_6 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_35 = paddle._C_ops.shape64(reshape_109) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + shape64_35, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_35 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_44 = paddle._C_ops.matmul(reshape_109, parameter_208, False, False) + del parameter_208, reshape_109 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_54 = paddle._C_ops.add(matmul_44, parameter_207) + del matmul_44, parameter_207 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_51 = [slice_56, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_49 = paddle._C_ops.stack(combine_51, 0) + del combine_51 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_113 = paddle._C_ops.reshape(add_54, stack_49) + del add_54, stack_49 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_48 = paddle._C_ops.transpose(reshape_113, [2, 0, 3, 1, 4]) + del reshape_113 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_48 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_57, full_8, float("0"), True) + del slice_57 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_49 = paddle._C_ops.transpose(slice_58, [0, 1, 3, 2]) + del slice_58 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_45 = paddle._C_ops.matmul(scale_7, transpose_49, False, False) + del scale_7, transpose_49 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_114 = paddle._C_ops.reshape(data_15, full_int_array_7) + del data_15 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_7 = paddle._C_ops.index_select(data_16, reshape_114, 0) + del data_16, reshape_114 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_115 = paddle._C_ops.reshape(index_select_7, full_int_array_8) + del index_select_7 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_50 = paddle._C_ops.transpose(reshape_115, [2, 0, 1]) + del reshape_115 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_50, full_int_array_0) + del transpose_50 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_55 = paddle._C_ops.add(matmul_45, unsqueeze_21) + del matmul_45, unsqueeze_21 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_3 = paddle._C_ops.floor_divide(slice_56, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_52 = [floor_divide_3, full_6, full_26, full_4, full_4] + del floor_divide_3 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_50 = paddle._C_ops.stack(combine_52, 0) + del combine_52 + + # pd_op.reshape: (-1x4x16x144x144xf32) <- (-1x16x144x144xf32, 5xi64) + reshape_116 = paddle._C_ops.reshape(add_55, stack_50) + del add_55, stack_50 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(where_7, full_int_array_1) + del where_7 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_22, full_int_array_0) + del unsqueeze_22 + + # pd_op.add: (-1x4x16x144x144xf32) <- (-1x4x16x144x144xf32, 1x4x1x144x144xf32) + add_56 = paddle._C_ops.add(reshape_116, unsqueeze_23) + del reshape_116, unsqueeze_23 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_53 = [slice_56, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_51 = paddle._C_ops.stack(combine_53, 0) + del combine_53 + + # pd_op.reshape: (-1x16x144x144xf32) <- (-1x4x16x144x144xf32, 4xi64) + reshape_117 = paddle._C_ops.reshape(add_56, stack_51) + del add_56, stack_51 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_7 = paddle._C_ops.softmax(reshape_117, -1) + del reshape_117 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_46 = paddle._C_ops.matmul(softmax_7, slice_59, False, False) + del slice_59, softmax_7 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_51 = paddle._C_ops.transpose(matmul_46, [0, 2, 1, 3]) + del matmul_46 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_54 = [slice_56, full_4, full_18] + del slice_56 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_52 = paddle._C_ops.stack(combine_54, 0) + del combine_54 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_118 = paddle._C_ops.reshape(transpose_51, stack_52) + del stack_52, transpose_51 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_47 = paddle._C_ops.matmul(reshape_118, parameter_206, False, False) + del parameter_206, reshape_118 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_57 = paddle._C_ops.add(matmul_47, parameter_205) + del matmul_47, parameter_205 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_119 = paddle._C_ops.reshape(add_57, full_int_array_38) + del add_57 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_120 = paddle._C_ops.reshape(reshape_119, full_int_array_40) + del reshape_119 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_52 = paddle._C_ops.transpose(reshape_120, [0, 1, 3, 2, 4, 5]) + del reshape_120 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(transpose_52, full_int_array_41) + del transpose_52 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_121, full_int_array_29, [1, 2]) + del reshape_121 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_55 = [slice_53, full_30, full_18] + del slice_53 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_53 = paddle._C_ops.stack(combine_55, 0) + del combine_55 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_122 = paddle._C_ops.reshape(roll_7, stack_53) + del roll_7, stack_53 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_58 = paddle._C_ops.add(add_53, reshape_122) + del add_53, reshape_122 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_58, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del layer_norm_54, parameter_202 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_59 = paddle._C_ops.add(matmul_48, parameter_201) + del matmul_48, parameter_201 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_7 = paddle._C_ops.gelu(add_59, False) + del add_59 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_49 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del gelu_7, parameter_200 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_60 = paddle._C_ops.add(matmul_49, parameter_199) + del matmul_49, parameter_199 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_61 = paddle._C_ops.add(add_58, add_60) + del add_58, add_60 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_36 = paddle._C_ops.shape64(add_61) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + shape64_36, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_36 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_61, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_56 = [slice_60, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_54 = paddle._C_ops.stack(combine_56, 0) + del combine_56 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_123 = paddle._C_ops.reshape(layer_norm_57, stack_54) + del layer_norm_57, stack_54 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_37 = paddle._C_ops.shape64(reshape_123) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + shape64_37, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_37 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_57 = [slice_61, full_29, full_3, full_29, full_3, full_18] + del slice_61 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_55 = paddle._C_ops.stack(combine_57, 0) + del combine_57 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_124 = paddle._C_ops.reshape(reshape_123, stack_55) + del reshape_123, stack_55 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_53 = paddle._C_ops.transpose(reshape_124, [0, 1, 3, 2, 4, 5]) + del reshape_124 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_53, full_int_array_38) + del transpose_53 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_39) + del reshape_125 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_38 = paddle._C_ops.shape64(reshape_126) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + shape64_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_38 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_50 = paddle._C_ops.matmul(reshape_126, parameter_196, False, False) + del parameter_196, reshape_126 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_62 = paddle._C_ops.add(matmul_50, parameter_195) + del matmul_50, parameter_195 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_58 = [slice_62, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_56 = paddle._C_ops.stack(combine_58, 0) + del combine_58 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_127 = paddle._C_ops.reshape(add_62, stack_56) + del add_62, stack_56 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_54 = paddle._C_ops.transpose(reshape_127, [2, 0, 3, 1, 4]) + del reshape_127 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_54 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_63, full_8, float("0"), True) + del slice_63 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_55 = paddle._C_ops.transpose(slice_64, [0, 1, 3, 2]) + del slice_64 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_51 = paddle._C_ops.matmul(scale_8, transpose_55, False, False) + del scale_8, transpose_55 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_128 = paddle._C_ops.reshape(data_17, full_int_array_7) + del data_17 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_8 = paddle._C_ops.index_select(data_18, reshape_128, 0) + del data_18, reshape_128 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_129 = paddle._C_ops.reshape(index_select_8, full_int_array_8) + del index_select_8 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_56 = paddle._C_ops.transpose(reshape_129, [2, 0, 1]) + del reshape_129 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_56, full_int_array_0) + del transpose_56 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_63 = paddle._C_ops.add(matmul_51, unsqueeze_24) + del matmul_51, unsqueeze_24 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_8 = paddle._C_ops.softmax(add_63, -1) + del add_63 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_52 = paddle._C_ops.matmul(softmax_8, slice_65, False, False) + del slice_65, softmax_8 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_57 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_59 = [slice_62, full_4, full_18] + del slice_62 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_57 = paddle._C_ops.stack(combine_59, 0) + del combine_59 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_130 = paddle._C_ops.reshape(transpose_57, stack_57) + del stack_57, transpose_57 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_53 = paddle._C_ops.matmul(reshape_130, parameter_194, False, False) + del parameter_194, reshape_130 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_64 = paddle._C_ops.add(matmul_53, parameter_193) + del matmul_53, parameter_193 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_131 = paddle._C_ops.reshape(add_64, full_int_array_38) + del add_64 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_132 = paddle._C_ops.reshape(reshape_131, full_int_array_40) + del reshape_131 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_58 = paddle._C_ops.transpose(reshape_132, [0, 1, 3, 2, 4, 5]) + del reshape_132 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_58, full_int_array_41) + del transpose_58 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_60 = [slice_60, full_30, full_18] + del slice_60 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_58 = paddle._C_ops.stack(combine_60, 0) + del combine_60 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, stack_58) + del reshape_133, stack_58 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_65 = paddle._C_ops.add(add_61, reshape_134) + del add_61, reshape_134 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_65, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del layer_norm_60, parameter_190 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_66 = paddle._C_ops.add(matmul_54, parameter_189) + del matmul_54, parameter_189 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_8 = paddle._C_ops.gelu(add_66, False) + del add_66 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_55 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del gelu_8, parameter_188 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_67 = paddle._C_ops.add(matmul_55, parameter_187) + del matmul_55, parameter_187 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_68 = paddle._C_ops.add(add_65, add_67) + del add_65, add_67 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_39 = paddle._C_ops.shape64(add_68) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + shape64_39, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_39 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_68, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_61 = [slice_66, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_59 = paddle._C_ops.stack(combine_61, 0) + del combine_61 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_135 = paddle._C_ops.reshape(layer_norm_63, stack_59) + del layer_norm_63, stack_59 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_40 = paddle._C_ops.shape64(reshape_135) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + shape64_40, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_40 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_135, full_int_array_11, [1, 2]) + del reshape_135 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_41 = paddle._C_ops.shape64(roll_8) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + shape64_41, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_41 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_62 = [slice_68, full_29, full_3, full_29, full_3, full_18] + del slice_68 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_60 = paddle._C_ops.stack(combine_62, 0) + del combine_62 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_136 = paddle._C_ops.reshape(roll_8, stack_60) + del roll_8, stack_60 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_59 = paddle._C_ops.transpose(reshape_136, [0, 1, 3, 2, 4, 5]) + del reshape_136 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_137 = paddle._C_ops.reshape(transpose_59, full_int_array_38) + del transpose_59 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_138 = paddle._C_ops.reshape(reshape_137, full_int_array_39) + del reshape_137 + + # pd_op.full: (1x24x24x1xf32) <- () + full_36 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_36, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_36 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_139 = paddle._C_ops.reshape(set_value__4, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_60 = paddle._C_ops.transpose(reshape_139, [0, 1, 3, 2, 4, 5]) + del reshape_139 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_140 = paddle._C_ops.reshape(transpose_60, full_int_array_27) + del transpose_60 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_141 = paddle._C_ops.reshape(reshape_140, full_int_array_28) + del reshape_140 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(reshape_141, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(reshape_141, full_int_array_5) + del reshape_141 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_25, unsqueeze_26) + del unsqueeze_25, unsqueeze_26 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_32, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_9 = paddle._C_ops.where(equal_4, full_33, where_8) + del equal_4, where_8 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_42 = paddle._C_ops.shape64(reshape_138) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + shape64_42, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_42 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_56 = paddle._C_ops.matmul(reshape_138, parameter_184, False, False) + del parameter_184, reshape_138 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_69 = paddle._C_ops.add(matmul_56, parameter_183) + del matmul_56, parameter_183 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_63 = [slice_69, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_61 = paddle._C_ops.stack(combine_63, 0) + del combine_63 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_142 = paddle._C_ops.reshape(add_69, stack_61) + del add_69, stack_61 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_61 = paddle._C_ops.transpose(reshape_142, [2, 0, 3, 1, 4]) + del reshape_142 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_72 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_61 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_70, full_8, float("0"), True) + del slice_70 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_62 = paddle._C_ops.transpose(slice_71, [0, 1, 3, 2]) + del slice_71 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_57 = paddle._C_ops.matmul(scale_9, transpose_62, False, False) + del scale_9, transpose_62 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_143 = paddle._C_ops.reshape(data_19, full_int_array_7) + del data_19 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_9 = paddle._C_ops.index_select(data_20, reshape_143, 0) + del data_20, reshape_143 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_144 = paddle._C_ops.reshape(index_select_9, full_int_array_8) + del index_select_9 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_63 = paddle._C_ops.transpose(reshape_144, [2, 0, 1]) + del reshape_144 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_63, full_int_array_0) + del transpose_63 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_70 = paddle._C_ops.add(matmul_57, unsqueeze_27) + del matmul_57, unsqueeze_27 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_4 = paddle._C_ops.floor_divide(slice_69, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_64 = [floor_divide_4, full_6, full_26, full_4, full_4] + del floor_divide_4 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_62 = paddle._C_ops.stack(combine_64, 0) + del combine_64 + + # pd_op.reshape: (-1x4x16x144x144xf32) <- (-1x16x144x144xf32, 5xi64) + reshape_145 = paddle._C_ops.reshape(add_70, stack_62) + del add_70, stack_62 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(where_9, full_int_array_1) + del where_9 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_28, full_int_array_0) + del unsqueeze_28 + + # pd_op.add: (-1x4x16x144x144xf32) <- (-1x4x16x144x144xf32, 1x4x1x144x144xf32) + add_71 = paddle._C_ops.add(reshape_145, unsqueeze_29) + del reshape_145, unsqueeze_29 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_65 = [slice_69, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_63 = paddle._C_ops.stack(combine_65, 0) + del combine_65 + + # pd_op.reshape: (-1x16x144x144xf32) <- (-1x4x16x144x144xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_71, stack_63) + del add_71, stack_63 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_9 = paddle._C_ops.softmax(reshape_146, -1) + del reshape_146 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_58 = paddle._C_ops.matmul(softmax_9, slice_72, False, False) + del slice_72, softmax_9 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_64 = paddle._C_ops.transpose(matmul_58, [0, 2, 1, 3]) + del matmul_58 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_66 = [slice_69, full_4, full_18] + del slice_69 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_64 = paddle._C_ops.stack(combine_66, 0) + del combine_66 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_147 = paddle._C_ops.reshape(transpose_64, stack_64) + del stack_64, transpose_64 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_59 = paddle._C_ops.matmul(reshape_147, parameter_182, False, False) + del parameter_182, reshape_147 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_72 = paddle._C_ops.add(matmul_59, parameter_181) + del matmul_59, parameter_181 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_148 = paddle._C_ops.reshape(add_72, full_int_array_38) + del add_72 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_149 = paddle._C_ops.reshape(reshape_148, full_int_array_40) + del reshape_148 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_65 = paddle._C_ops.transpose(reshape_149, [0, 1, 3, 2, 4, 5]) + del reshape_149 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_65, full_int_array_41) + del transpose_65 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_150, full_int_array_29, [1, 2]) + del reshape_150 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_67 = [slice_66, full_30, full_18] + del slice_66 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_65 = paddle._C_ops.stack(combine_67, 0) + del combine_67 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(roll_9, stack_65) + del roll_9, stack_65 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_73 = paddle._C_ops.add(add_68, reshape_151) + del add_68, reshape_151 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del layer_norm_66, parameter_178 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_74 = paddle._C_ops.add(matmul_60, parameter_177) + del matmul_60, parameter_177 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_9 = paddle._C_ops.gelu(add_74, False) + del add_74 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_61 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del gelu_9, parameter_176 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_75 = paddle._C_ops.add(matmul_61, parameter_175) + del matmul_61, parameter_175 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_76 = paddle._C_ops.add(add_73, add_75) + del add_73, add_75 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_43 = paddle._C_ops.shape64(add_76) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_73 = paddle._C_ops.slice( + shape64_43, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_43 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_68 = [slice_73, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_66 = paddle._C_ops.stack(combine_68, 0) + del combine_68 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_152 = paddle._C_ops.reshape(layer_norm_69, stack_66) + del layer_norm_69, stack_66 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_44 = paddle._C_ops.shape64(reshape_152) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_74 = paddle._C_ops.slice( + shape64_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_44 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_69 = [slice_74, full_29, full_3, full_29, full_3, full_18] + del slice_74 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_67 = paddle._C_ops.stack(combine_69, 0) + del combine_69 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_153 = paddle._C_ops.reshape(reshape_152, stack_67) + del reshape_152, stack_67 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_66 = paddle._C_ops.transpose(reshape_153, [0, 1, 3, 2, 4, 5]) + del reshape_153 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_154 = paddle._C_ops.reshape(transpose_66, full_int_array_38) + del transpose_66 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_155 = paddle._C_ops.reshape(reshape_154, full_int_array_39) + del reshape_154 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_45 = paddle._C_ops.shape64(reshape_155) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_75 = paddle._C_ops.slice( + shape64_45, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_45 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_62 = paddle._C_ops.matmul(reshape_155, parameter_172, False, False) + del parameter_172, reshape_155 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_77 = paddle._C_ops.add(matmul_62, parameter_171) + del matmul_62, parameter_171 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_70 = [slice_75, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_68 = paddle._C_ops.stack(combine_70, 0) + del combine_70 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_156 = paddle._C_ops.reshape(add_77, stack_68) + del add_77, stack_68 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_67 = paddle._C_ops.transpose(reshape_156, [2, 0, 3, 1, 4]) + del reshape_156 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_76 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_77 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_78 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_67 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_76, full_8, float("0"), True) + del slice_76 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_68 = paddle._C_ops.transpose(slice_77, [0, 1, 3, 2]) + del slice_77 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_63 = paddle._C_ops.matmul(scale_10, transpose_68, False, False) + del scale_10, transpose_68 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_157 = paddle._C_ops.reshape(data_21, full_int_array_7) + del data_21 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_10 = paddle._C_ops.index_select(data_22, reshape_157, 0) + del data_22, reshape_157 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_158 = paddle._C_ops.reshape(index_select_10, full_int_array_8) + del index_select_10 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_69 = paddle._C_ops.transpose(reshape_158, [2, 0, 1]) + del reshape_158 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_69, full_int_array_0) + del transpose_69 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_78 = paddle._C_ops.add(matmul_63, unsqueeze_30) + del matmul_63, unsqueeze_30 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_10 = paddle._C_ops.softmax(add_78, -1) + del add_78 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_64 = paddle._C_ops.matmul(softmax_10, slice_78, False, False) + del slice_78, softmax_10 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_70 = paddle._C_ops.transpose(matmul_64, [0, 2, 1, 3]) + del matmul_64 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_71 = [slice_75, full_4, full_18] + del slice_75 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_69 = paddle._C_ops.stack(combine_71, 0) + del combine_71 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_159 = paddle._C_ops.reshape(transpose_70, stack_69) + del stack_69, transpose_70 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_65 = paddle._C_ops.matmul(reshape_159, parameter_170, False, False) + del parameter_170, reshape_159 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_79 = paddle._C_ops.add(matmul_65, parameter_169) + del matmul_65, parameter_169 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_160 = paddle._C_ops.reshape(add_79, full_int_array_38) + del add_79 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_161 = paddle._C_ops.reshape(reshape_160, full_int_array_40) + del reshape_160 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_71 = paddle._C_ops.transpose(reshape_161, [0, 1, 3, 2, 4, 5]) + del reshape_161 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_162 = paddle._C_ops.reshape(transpose_71, full_int_array_41) + del transpose_71 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_72 = [slice_73, full_30, full_18] + del slice_73 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_70 = paddle._C_ops.stack(combine_72, 0) + del combine_72 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_163 = paddle._C_ops.reshape(reshape_162, stack_70) + del reshape_162, stack_70 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_80 = paddle._C_ops.add(add_76, reshape_163) + del add_76, reshape_163 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_80, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del layer_norm_72, parameter_166 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_81 = paddle._C_ops.add(matmul_66, parameter_165) + del matmul_66, parameter_165 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_10 = paddle._C_ops.gelu(add_81, False) + del add_81 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_67 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del gelu_10, parameter_164 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_82 = paddle._C_ops.add(matmul_67, parameter_163) + del matmul_67, parameter_163 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_83 = paddle._C_ops.add(add_80, add_82) + del add_80, add_82 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_46 = paddle._C_ops.shape64(add_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_79 = paddle._C_ops.slice( + shape64_46, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_46 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_83, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_73 = [slice_79, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_71 = paddle._C_ops.stack(combine_73, 0) + del combine_73 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(layer_norm_75, stack_71) + del layer_norm_75, stack_71 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_47 = paddle._C_ops.shape64(reshape_164) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_80 = paddle._C_ops.slice( + shape64_47, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_47 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_164, full_int_array_11, [1, 2]) + del reshape_164 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_48 = paddle._C_ops.shape64(roll_10) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_81 = paddle._C_ops.slice( + shape64_48, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_48 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_74 = [slice_81, full_29, full_3, full_29, full_3, full_18] + del slice_81 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_72 = paddle._C_ops.stack(combine_74, 0) + del combine_74 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_165 = paddle._C_ops.reshape(roll_10, stack_72) + del roll_10, stack_72 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_72 = paddle._C_ops.transpose(reshape_165, [0, 1, 3, 2, 4, 5]) + del reshape_165 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(transpose_72, full_int_array_38) + del transpose_72 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_167 = paddle._C_ops.reshape(reshape_166, full_int_array_39) + del reshape_166 + + # pd_op.full: (1x24x24x1xf32) <- () + full_37 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_37, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_37 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_168 = paddle._C_ops.reshape(set_value__5, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_73 = paddle._C_ops.transpose(reshape_168, [0, 1, 3, 2, 4, 5]) + del reshape_168 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_169 = paddle._C_ops.reshape(transpose_73, full_int_array_27) + del transpose_73 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_170 = paddle._C_ops.reshape(reshape_169, full_int_array_28) + del reshape_169 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(reshape_170, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(reshape_170, full_int_array_5) + del reshape_170 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_31, unsqueeze_32) + del unsqueeze_31, unsqueeze_32 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_32, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_11 = paddle._C_ops.where(equal_5, full_33, where_10) + del equal_5, where_10 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_49 = paddle._C_ops.shape64(reshape_167) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_82 = paddle._C_ops.slice( + shape64_49, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_49 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_68 = paddle._C_ops.matmul(reshape_167, parameter_160, False, False) + del parameter_160, reshape_167 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_84 = paddle._C_ops.add(matmul_68, parameter_159) + del matmul_68, parameter_159 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_75 = [slice_82, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_73 = paddle._C_ops.stack(combine_75, 0) + del combine_75 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_171 = paddle._C_ops.reshape(add_84, stack_73) + del add_84, stack_73 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_171, [2, 0, 3, 1, 4]) + del reshape_171 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_83 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_84 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_85 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_74 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_83, full_8, float("0"), True) + del slice_83 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_84, [0, 1, 3, 2]) + del slice_84 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_69 = paddle._C_ops.matmul(scale_11, transpose_75, False, False) + del scale_11, transpose_75 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_172 = paddle._C_ops.reshape(data_23, full_int_array_7) + del data_23 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_11 = paddle._C_ops.index_select(data_24, reshape_172, 0) + del data_24, reshape_172 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_173 = paddle._C_ops.reshape(index_select_11, full_int_array_8) + del index_select_11 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_76 = paddle._C_ops.transpose(reshape_173, [2, 0, 1]) + del reshape_173 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_0) + del transpose_76 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_85 = paddle._C_ops.add(matmul_69, unsqueeze_33) + del matmul_69, unsqueeze_33 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_5 = paddle._C_ops.floor_divide(slice_82, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_76 = [floor_divide_5, full_6, full_26, full_4, full_4] + del floor_divide_5 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_74 = paddle._C_ops.stack(combine_76, 0) + del combine_76 + + # pd_op.reshape: (-1x4x16x144x144xf32) <- (-1x16x144x144xf32, 5xi64) + reshape_174 = paddle._C_ops.reshape(add_85, stack_74) + del add_85, stack_74 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(where_11, full_int_array_1) + del where_11 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_34, full_int_array_0) + del unsqueeze_34 + + # pd_op.add: (-1x4x16x144x144xf32) <- (-1x4x16x144x144xf32, 1x4x1x144x144xf32) + add_86 = paddle._C_ops.add(reshape_174, unsqueeze_35) + del reshape_174, unsqueeze_35 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_77 = [slice_82, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_75 = paddle._C_ops.stack(combine_77, 0) + del combine_77 + + # pd_op.reshape: (-1x16x144x144xf32) <- (-1x4x16x144x144xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(add_86, stack_75) + del add_86, stack_75 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_11 = paddle._C_ops.softmax(reshape_175, -1) + del reshape_175 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_70 = paddle._C_ops.matmul(softmax_11, slice_85, False, False) + del slice_85, softmax_11 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_70, [0, 2, 1, 3]) + del matmul_70 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_78 = [slice_82, full_4, full_18] + del slice_82 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_76 = paddle._C_ops.stack(combine_78, 0) + del combine_78 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_176 = paddle._C_ops.reshape(transpose_77, stack_76) + del stack_76, transpose_77 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_71 = paddle._C_ops.matmul(reshape_176, parameter_158, False, False) + del parameter_158, reshape_176 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_87 = paddle._C_ops.add(matmul_71, parameter_157) + del matmul_71, parameter_157 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_177 = paddle._C_ops.reshape(add_87, full_int_array_38) + del add_87 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_178 = paddle._C_ops.reshape(reshape_177, full_int_array_40) + del reshape_177 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_78 = paddle._C_ops.transpose(reshape_178, [0, 1, 3, 2, 4, 5]) + del reshape_178 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_179 = paddle._C_ops.reshape(transpose_78, full_int_array_41) + del transpose_78 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_179, full_int_array_29, [1, 2]) + del reshape_179 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_79 = [slice_79, full_30, full_18] + del slice_79 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_77 = paddle._C_ops.stack(combine_79, 0) + del combine_79 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_180 = paddle._C_ops.reshape(roll_11, stack_77) + del roll_11, stack_77 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_88 = paddle._C_ops.add(add_83, reshape_180) + del add_83, reshape_180 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_88, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_72 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del layer_norm_78, parameter_154 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_89 = paddle._C_ops.add(matmul_72, parameter_153) + del matmul_72, parameter_153 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_11 = paddle._C_ops.gelu(add_89, False) + del add_89 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_73 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del gelu_11, parameter_152 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_90 = paddle._C_ops.add(matmul_73, parameter_151) + del matmul_73, parameter_151 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_91 = paddle._C_ops.add(add_88, add_90) + del add_88, add_90 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_50 = paddle._C_ops.shape64(add_91) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_86 = paddle._C_ops.slice( + shape64_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_50 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_91, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_80 = [slice_86, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_78 = paddle._C_ops.stack(combine_80, 0) + del combine_80 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(layer_norm_81, stack_78) + del layer_norm_81, stack_78 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_51 = paddle._C_ops.shape64(reshape_181) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_87 = paddle._C_ops.slice( + shape64_51, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_81 = [slice_87, full_29, full_3, full_29, full_3, full_18] + del slice_87 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_79 = paddle._C_ops.stack(combine_81, 0) + del combine_81 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, stack_79) + del reshape_181, stack_79 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_79 = paddle._C_ops.transpose(reshape_182, [0, 1, 3, 2, 4, 5]) + del reshape_182 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(transpose_79, full_int_array_38) + del transpose_79 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_184 = paddle._C_ops.reshape(reshape_183, full_int_array_39) + del reshape_183 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_52 = paddle._C_ops.shape64(reshape_184) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_88 = paddle._C_ops.slice( + shape64_52, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_52 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_74 = paddle._C_ops.matmul(reshape_184, parameter_148, False, False) + del parameter_148, reshape_184 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_92 = paddle._C_ops.add(matmul_74, parameter_147) + del matmul_74, parameter_147 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_82 = [slice_88, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_80 = paddle._C_ops.stack(combine_82, 0) + del combine_82 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_185 = paddle._C_ops.reshape(add_92, stack_80) + del add_92, stack_80 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_185, [2, 0, 3, 1, 4]) + del reshape_185 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_89 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_90 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_91 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_80 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_89, full_8, float("0"), True) + del slice_89 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_90, [0, 1, 3, 2]) + del slice_90 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_75 = paddle._C_ops.matmul(scale_12, transpose_81, False, False) + del scale_12, transpose_81 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_25, full_int_array_7) + del data_25 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_12 = paddle._C_ops.index_select(data_26, reshape_186, 0) + del data_26, reshape_186 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_187 = paddle._C_ops.reshape(index_select_12, full_int_array_8) + del index_select_12 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_82 = paddle._C_ops.transpose(reshape_187, [2, 0, 1]) + del reshape_187 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_0) + del transpose_82 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_93 = paddle._C_ops.add(matmul_75, unsqueeze_36) + del matmul_75, unsqueeze_36 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_12 = paddle._C_ops.softmax(add_93, -1) + del add_93 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_76 = paddle._C_ops.matmul(softmax_12, slice_91, False, False) + del slice_91, softmax_12 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_76, [0, 2, 1, 3]) + del matmul_76 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_83 = [slice_88, full_4, full_18] + del slice_88 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_81 = paddle._C_ops.stack(combine_83, 0) + del combine_83 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_83, stack_81) + del stack_81, transpose_83 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_77 = paddle._C_ops.matmul(reshape_188, parameter_146, False, False) + del parameter_146, reshape_188 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_94 = paddle._C_ops.add(matmul_77, parameter_145) + del matmul_77, parameter_145 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_94, full_int_array_38) + del add_94 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_190 = paddle._C_ops.reshape(reshape_189, full_int_array_40) + del reshape_189 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_84 = paddle._C_ops.transpose(reshape_190, [0, 1, 3, 2, 4, 5]) + del reshape_190 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_191 = paddle._C_ops.reshape(transpose_84, full_int_array_41) + del transpose_84 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_84 = [slice_86, full_30, full_18] + del slice_86 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_82 = paddle._C_ops.stack(combine_84, 0) + del combine_84 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_192 = paddle._C_ops.reshape(reshape_191, stack_82) + del reshape_191, stack_82 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_95 = paddle._C_ops.add(add_91, reshape_192) + del add_91, reshape_192 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del layer_norm_84, parameter_142 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_96 = paddle._C_ops.add(matmul_78, parameter_141) + del matmul_78, parameter_141 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_12 = paddle._C_ops.gelu(add_96, False) + del add_96 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_79 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del gelu_12, parameter_140 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_97 = paddle._C_ops.add(matmul_79, parameter_139) + del matmul_79, parameter_139 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_98 = paddle._C_ops.add(add_95, add_97) + del add_95, add_97 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_53 = paddle._C_ops.shape64(add_98) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_92 = paddle._C_ops.slice( + shape64_53, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_53 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_85 = [slice_92, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_83 = paddle._C_ops.stack(combine_85, 0) + del combine_85 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_193 = paddle._C_ops.reshape(layer_norm_87, stack_83) + del layer_norm_87, stack_83 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_54 = paddle._C_ops.shape64(reshape_193) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_93 = paddle._C_ops.slice( + shape64_54, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_54 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_193, full_int_array_11, [1, 2]) + del reshape_193 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_55 = paddle._C_ops.shape64(roll_12) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_94 = paddle._C_ops.slice( + shape64_55, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_55 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_86 = [slice_94, full_29, full_3, full_29, full_3, full_18] + del slice_94 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_84 = paddle._C_ops.stack(combine_86, 0) + del combine_86 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_194 = paddle._C_ops.reshape(roll_12, stack_84) + del roll_12, stack_84 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_85 = paddle._C_ops.transpose(reshape_194, [0, 1, 3, 2, 4, 5]) + del reshape_194 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_85, full_int_array_38) + del transpose_85 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_39) + del reshape_195 + + # pd_op.full: (1x24x24x1xf32) <- () + full_38 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_38, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_38 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_197 = paddle._C_ops.reshape(set_value__6, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_86 = paddle._C_ops.transpose(reshape_197, [0, 1, 3, 2, 4, 5]) + del reshape_197 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_198 = paddle._C_ops.reshape(transpose_86, full_int_array_27) + del transpose_86 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_199 = paddle._C_ops.reshape(reshape_198, full_int_array_28) + del reshape_198 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_199, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_199, full_int_array_5) + del reshape_199 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_32, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_13 = paddle._C_ops.where(equal_6, full_33, where_12) + del equal_6, where_12 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_56 = paddle._C_ops.shape64(reshape_196) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_95 = paddle._C_ops.slice( + shape64_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_56 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_80 = paddle._C_ops.matmul(reshape_196, parameter_136, False, False) + del parameter_136, reshape_196 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_99 = paddle._C_ops.add(matmul_80, parameter_135) + del matmul_80, parameter_135 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_87 = [slice_95, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_85 = paddle._C_ops.stack(combine_87, 0) + del combine_87 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_200 = paddle._C_ops.reshape(add_99, stack_85) + del add_99, stack_85 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_87 = paddle._C_ops.transpose(reshape_200, [2, 0, 3, 1, 4]) + del reshape_200 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_96 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_97 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_98 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_87 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_96, full_8, float("0"), True) + del slice_96 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_88 = paddle._C_ops.transpose(slice_97, [0, 1, 3, 2]) + del slice_97 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_81 = paddle._C_ops.matmul(scale_13, transpose_88, False, False) + del scale_13, transpose_88 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_201 = paddle._C_ops.reshape(data_27, full_int_array_7) + del data_27 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_13 = paddle._C_ops.index_select(data_28, reshape_201, 0) + del data_28, reshape_201 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_202 = paddle._C_ops.reshape(index_select_13, full_int_array_8) + del index_select_13 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_89 = paddle._C_ops.transpose(reshape_202, [2, 0, 1]) + del reshape_202 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(transpose_89, full_int_array_0) + del transpose_89 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_100 = paddle._C_ops.add(matmul_81, unsqueeze_39) + del matmul_81, unsqueeze_39 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_6 = paddle._C_ops.floor_divide(slice_95, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_88 = [floor_divide_6, full_6, full_26, full_4, full_4] + del floor_divide_6 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_86 = paddle._C_ops.stack(combine_88, 0) + del combine_88 + + # pd_op.reshape: (-1x4x16x144x144xf32) <- (-1x16x144x144xf32, 5xi64) + reshape_203 = paddle._C_ops.reshape(add_100, stack_86) + del add_100, stack_86 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(where_13, full_int_array_1) + del where_13 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(unsqueeze_40, full_int_array_0) + del unsqueeze_40 + + # pd_op.add: (-1x4x16x144x144xf32) <- (-1x4x16x144x144xf32, 1x4x1x144x144xf32) + add_101 = paddle._C_ops.add(reshape_203, unsqueeze_41) + del reshape_203, unsqueeze_41 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_89 = [slice_95, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_87 = paddle._C_ops.stack(combine_89, 0) + del combine_89 + + # pd_op.reshape: (-1x16x144x144xf32) <- (-1x4x16x144x144xf32, 4xi64) + reshape_204 = paddle._C_ops.reshape(add_101, stack_87) + del add_101, stack_87 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_13 = paddle._C_ops.softmax(reshape_204, -1) + del reshape_204 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_82 = paddle._C_ops.matmul(softmax_13, slice_98, False, False) + del slice_98, softmax_13 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_90 = paddle._C_ops.transpose(matmul_82, [0, 2, 1, 3]) + del matmul_82 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_90 = [slice_95, full_4, full_18] + del slice_95 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_88 = paddle._C_ops.stack(combine_90, 0) + del combine_90 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_205 = paddle._C_ops.reshape(transpose_90, stack_88) + del stack_88, transpose_90 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_83 = paddle._C_ops.matmul(reshape_205, parameter_134, False, False) + del parameter_134, reshape_205 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_102 = paddle._C_ops.add(matmul_83, parameter_133) + del matmul_83, parameter_133 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_206 = paddle._C_ops.reshape(add_102, full_int_array_38) + del add_102 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_207 = paddle._C_ops.reshape(reshape_206, full_int_array_40) + del reshape_206 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_91 = paddle._C_ops.transpose(reshape_207, [0, 1, 3, 2, 4, 5]) + del reshape_207 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(transpose_91, full_int_array_41) + del transpose_91 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_208, full_int_array_29, [1, 2]) + del reshape_208 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_91 = [slice_92, full_30, full_18] + del slice_92 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_89 = paddle._C_ops.stack(combine_91, 0) + del combine_91 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_209 = paddle._C_ops.reshape(roll_13, stack_89) + del roll_13, stack_89 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_103 = paddle._C_ops.add(add_98, reshape_209) + del add_98, reshape_209 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_103, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_84 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del layer_norm_90, parameter_130 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_104 = paddle._C_ops.add(matmul_84, parameter_129) + del matmul_84, parameter_129 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_13 = paddle._C_ops.gelu(add_104, False) + del add_104 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_85 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del gelu_13, parameter_128 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_105 = paddle._C_ops.add(matmul_85, parameter_127) + del matmul_85, parameter_127 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_106 = paddle._C_ops.add(add_103, add_105) + del add_103, add_105 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_57 = paddle._C_ops.shape64(add_106) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_99 = paddle._C_ops.slice( + shape64_57, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_57 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_106, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_92 = [slice_99, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_90 = paddle._C_ops.stack(combine_92, 0) + del combine_92 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_210 = paddle._C_ops.reshape(layer_norm_93, stack_90) + del layer_norm_93, stack_90 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_58 = paddle._C_ops.shape64(reshape_210) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_100 = paddle._C_ops.slice( + shape64_58, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_58 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_93 = [slice_100, full_29, full_3, full_29, full_3, full_18] + del slice_100 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_91 = paddle._C_ops.stack(combine_93, 0) + del combine_93 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_210, stack_91) + del reshape_210, stack_91 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_92 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_212 = paddle._C_ops.reshape(transpose_92, full_int_array_38) + del transpose_92 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(reshape_212, full_int_array_39) + del reshape_212 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_59 = paddle._C_ops.shape64(reshape_213) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_101 = paddle._C_ops.slice( + shape64_59, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_59 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_86 = paddle._C_ops.matmul(reshape_213, parameter_124, False, False) + del parameter_124, reshape_213 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_107 = paddle._C_ops.add(matmul_86, parameter_123) + del matmul_86, parameter_123 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_94 = [slice_101, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_92 = paddle._C_ops.stack(combine_94, 0) + del combine_94 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_214 = paddle._C_ops.reshape(add_107, stack_92) + del add_107, stack_92 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_93 = paddle._C_ops.transpose(reshape_214, [2, 0, 3, 1, 4]) + del reshape_214 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_102 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_103 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_104 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_93 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_102, full_8, float("0"), True) + del slice_102 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_94 = paddle._C_ops.transpose(slice_103, [0, 1, 3, 2]) + del slice_103 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_87 = paddle._C_ops.matmul(scale_14, transpose_94, False, False) + del scale_14, transpose_94 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_215 = paddle._C_ops.reshape(data_29, full_int_array_7) + del data_29 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_14 = paddle._C_ops.index_select(data_30, reshape_215, 0) + del data_30, reshape_215 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_216 = paddle._C_ops.reshape(index_select_14, full_int_array_8) + del index_select_14 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_95 = paddle._C_ops.transpose(reshape_216, [2, 0, 1]) + del reshape_216 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(transpose_95, full_int_array_0) + del transpose_95 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_108 = paddle._C_ops.add(matmul_87, unsqueeze_42) + del matmul_87, unsqueeze_42 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_14 = paddle._C_ops.softmax(add_108, -1) + del add_108 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_88 = paddle._C_ops.matmul(softmax_14, slice_104, False, False) + del slice_104, softmax_14 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_96 = paddle._C_ops.transpose(matmul_88, [0, 2, 1, 3]) + del matmul_88 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_95 = [slice_101, full_4, full_18] + del slice_101 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_93 = paddle._C_ops.stack(combine_95, 0) + del combine_95 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_217 = paddle._C_ops.reshape(transpose_96, stack_93) + del stack_93, transpose_96 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_89 = paddle._C_ops.matmul(reshape_217, parameter_122, False, False) + del parameter_122, reshape_217 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_109 = paddle._C_ops.add(matmul_89, parameter_121) + del matmul_89, parameter_121 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_218 = paddle._C_ops.reshape(add_109, full_int_array_38) + del add_109 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_219 = paddle._C_ops.reshape(reshape_218, full_int_array_40) + del reshape_218 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_97 = paddle._C_ops.transpose(reshape_219, [0, 1, 3, 2, 4, 5]) + del reshape_219 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_220 = paddle._C_ops.reshape(transpose_97, full_int_array_41) + del transpose_97 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_96 = [slice_99, full_30, full_18] + del slice_99 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_94 = paddle._C_ops.stack(combine_96, 0) + del combine_96 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_221 = paddle._C_ops.reshape(reshape_220, stack_94) + del reshape_220, stack_94 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_110 = paddle._C_ops.add(add_106, reshape_221) + del add_106, reshape_221 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_110, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del layer_norm_96, parameter_118 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_111 = paddle._C_ops.add(matmul_90, parameter_117) + del matmul_90, parameter_117 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_14 = paddle._C_ops.gelu(add_111, False) + del add_111 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_91 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del gelu_14, parameter_116 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_112 = paddle._C_ops.add(matmul_91, parameter_115) + del matmul_91, parameter_115 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_113 = paddle._C_ops.add(add_110, add_112) + del add_110, add_112 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_60 = paddle._C_ops.shape64(add_113) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_105 = paddle._C_ops.slice( + shape64_60, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_60 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_113, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_97 = [slice_105, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_95 = paddle._C_ops.stack(combine_97, 0) + del combine_97 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_222 = paddle._C_ops.reshape(layer_norm_99, stack_95) + del layer_norm_99, stack_95 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_61 = paddle._C_ops.shape64(reshape_222) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_106 = paddle._C_ops.slice( + shape64_61, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_61 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_222, full_int_array_11, [1, 2]) + del reshape_222 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_62 = paddle._C_ops.shape64(roll_14) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_107 = paddle._C_ops.slice( + shape64_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_62 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_98 = [slice_107, full_29, full_3, full_29, full_3, full_18] + del slice_107 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_96 = paddle._C_ops.stack(combine_98, 0) + del combine_98 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_223 = paddle._C_ops.reshape(roll_14, stack_96) + del roll_14, stack_96 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_98 = paddle._C_ops.transpose(reshape_223, [0, 1, 3, 2, 4, 5]) + del reshape_223 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_224 = paddle._C_ops.reshape(transpose_98, full_int_array_38) + del transpose_98 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_225 = paddle._C_ops.reshape(reshape_224, full_int_array_39) + del reshape_224 + + # pd_op.full: (1x24x24x1xf32) <- () + full_39 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_39, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_39 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_226 = paddle._C_ops.reshape(set_value__7, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_99 = paddle._C_ops.transpose(reshape_226, [0, 1, 3, 2, 4, 5]) + del reshape_226 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_227 = paddle._C_ops.reshape(transpose_99, full_int_array_27) + del transpose_99 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_228 = paddle._C_ops.reshape(reshape_227, full_int_array_28) + del reshape_227 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_228, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_228, full_int_array_5) + del reshape_228 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_32, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_15 = paddle._C_ops.where(equal_7, full_33, where_14) + del equal_7, where_14 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_63 = paddle._C_ops.shape64(reshape_225) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_108 = paddle._C_ops.slice( + shape64_63, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_63 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_92 = paddle._C_ops.matmul(reshape_225, parameter_112, False, False) + del parameter_112, reshape_225 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_114 = paddle._C_ops.add(matmul_92, parameter_111) + del matmul_92, parameter_111 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_99 = [slice_108, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_97 = paddle._C_ops.stack(combine_99, 0) + del combine_99 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_229 = paddle._C_ops.reshape(add_114, stack_97) + del add_114, stack_97 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_100 = paddle._C_ops.transpose(reshape_229, [2, 0, 3, 1, 4]) + del reshape_229 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_109 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_110 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_111 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_100 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_109, full_8, float("0"), True) + del slice_109 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_101 = paddle._C_ops.transpose(slice_110, [0, 1, 3, 2]) + del slice_110 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_93 = paddle._C_ops.matmul(scale_15, transpose_101, False, False) + del scale_15, transpose_101 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_230 = paddle._C_ops.reshape(data_31, full_int_array_7) + del data_31 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_15 = paddle._C_ops.index_select(data_32, reshape_230, 0) + del data_32, reshape_230 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_231 = paddle._C_ops.reshape(index_select_15, full_int_array_8) + del index_select_15 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_102 = paddle._C_ops.transpose(reshape_231, [2, 0, 1]) + del reshape_231 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(transpose_102, full_int_array_0) + del transpose_102 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_115 = paddle._C_ops.add(matmul_93, unsqueeze_45) + del matmul_93, unsqueeze_45 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_7 = paddle._C_ops.floor_divide(slice_108, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_100 = [floor_divide_7, full_6, full_26, full_4, full_4] + del floor_divide_7 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_98 = paddle._C_ops.stack(combine_100, 0) + del combine_100 + + # pd_op.reshape: (-1x4x16x144x144xf32) <- (-1x16x144x144xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_115, stack_98) + del add_115, stack_98 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(where_15, full_int_array_1) + del where_15 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(unsqueeze_46, full_int_array_0) + del unsqueeze_46 + + # pd_op.add: (-1x4x16x144x144xf32) <- (-1x4x16x144x144xf32, 1x4x1x144x144xf32) + add_116 = paddle._C_ops.add(reshape_232, unsqueeze_47) + del reshape_232, unsqueeze_47 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_101 = [slice_108, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_99 = paddle._C_ops.stack(combine_101, 0) + del combine_101 + + # pd_op.reshape: (-1x16x144x144xf32) <- (-1x4x16x144x144xf32, 4xi64) + reshape_233 = paddle._C_ops.reshape(add_116, stack_99) + del add_116, stack_99 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_15 = paddle._C_ops.softmax(reshape_233, -1) + del reshape_233 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_94 = paddle._C_ops.matmul(softmax_15, slice_111, False, False) + del slice_111, softmax_15 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_103 = paddle._C_ops.transpose(matmul_94, [0, 2, 1, 3]) + del matmul_94 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_102 = [slice_108, full_4, full_18] + del slice_108 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_100 = paddle._C_ops.stack(combine_102, 0) + del combine_102 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_234 = paddle._C_ops.reshape(transpose_103, stack_100) + del stack_100, transpose_103 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_95 = paddle._C_ops.matmul(reshape_234, parameter_110, False, False) + del parameter_110, reshape_234 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_117 = paddle._C_ops.add(matmul_95, parameter_109) + del matmul_95, parameter_109 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_235 = paddle._C_ops.reshape(add_117, full_int_array_38) + del add_117 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_236 = paddle._C_ops.reshape(reshape_235, full_int_array_40) + del reshape_235 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_104 = paddle._C_ops.transpose(reshape_236, [0, 1, 3, 2, 4, 5]) + del reshape_236 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_237 = paddle._C_ops.reshape(transpose_104, full_int_array_41) + del transpose_104 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_237, full_int_array_29, [1, 2]) + del reshape_237 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_103 = [slice_105, full_30, full_18] + del slice_105 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_101 = paddle._C_ops.stack(combine_103, 0) + del combine_103 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_238 = paddle._C_ops.reshape(roll_15, stack_101) + del roll_15, stack_101 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_118 = paddle._C_ops.add(add_113, reshape_238) + del add_113, reshape_238 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_96 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del layer_norm_102, parameter_106 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_119 = paddle._C_ops.add(matmul_96, parameter_105) + del matmul_96, parameter_105 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_15 = paddle._C_ops.gelu(add_119, False) + del add_119 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_97 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del gelu_15, parameter_104 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_120 = paddle._C_ops.add(matmul_97, parameter_103) + del matmul_97, parameter_103 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_121 = paddle._C_ops.add(add_118, add_120) + del add_118, add_120 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_64 = paddle._C_ops.shape64(add_121) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_112 = paddle._C_ops.slice( + shape64_64, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_64 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_121, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_104 = [slice_112, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_102 = paddle._C_ops.stack(combine_104, 0) + del combine_104 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_239 = paddle._C_ops.reshape(layer_norm_105, stack_102) + del layer_norm_105, stack_102 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_65 = paddle._C_ops.shape64(reshape_239) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_113 = paddle._C_ops.slice( + shape64_65, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_65 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_105 = [slice_113, full_29, full_3, full_29, full_3, full_18] + del slice_113 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_103 = paddle._C_ops.stack(combine_105, 0) + del combine_105 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_239, stack_103) + del reshape_239, stack_103 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_105 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_241 = paddle._C_ops.reshape(transpose_105, full_int_array_38) + del transpose_105 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_242 = paddle._C_ops.reshape(reshape_241, full_int_array_39) + del reshape_241 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_66 = paddle._C_ops.shape64(reshape_242) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_114 = paddle._C_ops.slice( + shape64_66, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_66 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_98 = paddle._C_ops.matmul(reshape_242, parameter_100, False, False) + del parameter_100, reshape_242 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_122 = paddle._C_ops.add(matmul_98, parameter_99) + del matmul_98, parameter_99 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_106 = [slice_114, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_104 = paddle._C_ops.stack(combine_106, 0) + del combine_106 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_243 = paddle._C_ops.reshape(add_122, stack_104) + del add_122, stack_104 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_106 = paddle._C_ops.transpose(reshape_243, [2, 0, 3, 1, 4]) + del reshape_243 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_115 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_116 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_117 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_106 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_115, full_8, float("0"), True) + del slice_115 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_107 = paddle._C_ops.transpose(slice_116, [0, 1, 3, 2]) + del slice_116 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_99 = paddle._C_ops.matmul(scale_16, transpose_107, False, False) + del scale_16, transpose_107 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_244 = paddle._C_ops.reshape(data_33, full_int_array_7) + del data_33 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_16 = paddle._C_ops.index_select(data_34, reshape_244, 0) + del data_34, reshape_244 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_245 = paddle._C_ops.reshape(index_select_16, full_int_array_8) + del index_select_16 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_108 = paddle._C_ops.transpose(reshape_245, [2, 0, 1]) + del reshape_245 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(transpose_108, full_int_array_0) + del transpose_108 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_123 = paddle._C_ops.add(matmul_99, unsqueeze_48) + del matmul_99, unsqueeze_48 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_16 = paddle._C_ops.softmax(add_123, -1) + del add_123 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_100 = paddle._C_ops.matmul(softmax_16, slice_117, False, False) + del slice_117, softmax_16 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_109 = paddle._C_ops.transpose(matmul_100, [0, 2, 1, 3]) + del matmul_100 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_107 = [slice_114, full_4, full_18] + del slice_114 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_105 = paddle._C_ops.stack(combine_107, 0) + del combine_107 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(transpose_109, stack_105) + del stack_105, transpose_109 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_101 = paddle._C_ops.matmul(reshape_246, parameter_98, False, False) + del parameter_98, reshape_246 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_124 = paddle._C_ops.add(matmul_101, parameter_97) + del matmul_101, parameter_97 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_124, full_int_array_38) + del add_124 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_247, full_int_array_40) + del reshape_247 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_110 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_249 = paddle._C_ops.reshape(transpose_110, full_int_array_41) + del transpose_110 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_108 = [slice_112, full_30, full_18] + del slice_112 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_106 = paddle._C_ops.stack(combine_108, 0) + del combine_108 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_250 = paddle._C_ops.reshape(reshape_249, stack_106) + del reshape_249, stack_106 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_125 = paddle._C_ops.add(add_121, reshape_250) + del add_121, reshape_250 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_125, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_102 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del layer_norm_108, parameter_94 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_126 = paddle._C_ops.add(matmul_102, parameter_93) + del matmul_102, parameter_93 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_16 = paddle._C_ops.gelu(add_126, False) + del add_126 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_103 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del gelu_16, parameter_92 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_127 = paddle._C_ops.add(matmul_103, parameter_91) + del matmul_103, parameter_91 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_128 = paddle._C_ops.add(add_125, add_127) + del add_125, add_127 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_67 = paddle._C_ops.shape64(add_128) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_118 = paddle._C_ops.slice( + shape64_67, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_67 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_128, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_109 = [slice_118, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_107 = paddle._C_ops.stack(combine_109, 0) + del combine_109 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_251 = paddle._C_ops.reshape(layer_norm_111, stack_107) + del layer_norm_111, stack_107 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_68 = paddle._C_ops.shape64(reshape_251) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_119 = paddle._C_ops.slice( + shape64_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_68 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_251, full_int_array_11, [1, 2]) + del reshape_251 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_69 = paddle._C_ops.shape64(roll_16) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_120 = paddle._C_ops.slice( + shape64_69, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_69 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_110 = [slice_120, full_29, full_3, full_29, full_3, full_18] + del slice_120 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_108 = paddle._C_ops.stack(combine_110, 0) + del combine_110 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(roll_16, stack_108) + del roll_16, stack_108 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_111 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_253 = paddle._C_ops.reshape(transpose_111, full_int_array_38) + del transpose_111 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_254 = paddle._C_ops.reshape(reshape_253, full_int_array_39) + del reshape_253 + + # pd_op.full: (1x24x24x1xf32) <- () + full_40 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_40, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_40 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_255 = paddle._C_ops.reshape(set_value__8, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_112 = paddle._C_ops.transpose(reshape_255, [0, 1, 3, 2, 4, 5]) + del reshape_255 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_256 = paddle._C_ops.reshape(transpose_112, full_int_array_27) + del transpose_112 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_257 = paddle._C_ops.reshape(reshape_256, full_int_array_28) + del reshape_256 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_257, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_257, full_int_array_5) + del reshape_257 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_32, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_17 = paddle._C_ops.where(equal_8, full_33, where_16) + del equal_8, where_16 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_70 = paddle._C_ops.shape64(reshape_254) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_121 = paddle._C_ops.slice( + shape64_70, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_70 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_104 = paddle._C_ops.matmul(reshape_254, parameter_88, False, False) + del parameter_88, reshape_254 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_129 = paddle._C_ops.add(matmul_104, parameter_87) + del matmul_104, parameter_87 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_111 = [slice_121, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_109 = paddle._C_ops.stack(combine_111, 0) + del combine_111 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_258 = paddle._C_ops.reshape(add_129, stack_109) + del add_129, stack_109 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_113 = paddle._C_ops.transpose(reshape_258, [2, 0, 3, 1, 4]) + del reshape_258 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_122 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_123 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_124 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_113 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_122, full_8, float("0"), True) + del slice_122 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_114 = paddle._C_ops.transpose(slice_123, [0, 1, 3, 2]) + del slice_123 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_105 = paddle._C_ops.matmul(scale_17, transpose_114, False, False) + del scale_17, transpose_114 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_259 = paddle._C_ops.reshape(data_35, full_int_array_7) + del data_35 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_17 = paddle._C_ops.index_select(data_36, reshape_259, 0) + del data_36, reshape_259 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_260 = paddle._C_ops.reshape(index_select_17, full_int_array_8) + del index_select_17 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_115 = paddle._C_ops.transpose(reshape_260, [2, 0, 1]) + del reshape_260 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(transpose_115, full_int_array_0) + del transpose_115 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_130 = paddle._C_ops.add(matmul_105, unsqueeze_51) + del matmul_105, unsqueeze_51 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_8 = paddle._C_ops.floor_divide(slice_121, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_112 = [floor_divide_8, full_6, full_26, full_4, full_4] + del floor_divide_8 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_110 = paddle._C_ops.stack(combine_112, 0) + del combine_112 + + # pd_op.reshape: (-1x4x16x144x144xf32) <- (-1x16x144x144xf32, 5xi64) + reshape_261 = paddle._C_ops.reshape(add_130, stack_110) + del add_130, stack_110 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(where_17, full_int_array_1) + del where_17 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(unsqueeze_52, full_int_array_0) + del unsqueeze_52 + + # pd_op.add: (-1x4x16x144x144xf32) <- (-1x4x16x144x144xf32, 1x4x1x144x144xf32) + add_131 = paddle._C_ops.add(reshape_261, unsqueeze_53) + del reshape_261, unsqueeze_53 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_113 = [slice_121, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_111 = paddle._C_ops.stack(combine_113, 0) + del combine_113 + + # pd_op.reshape: (-1x16x144x144xf32) <- (-1x4x16x144x144xf32, 4xi64) + reshape_262 = paddle._C_ops.reshape(add_131, stack_111) + del add_131, stack_111 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_17 = paddle._C_ops.softmax(reshape_262, -1) + del reshape_262 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_106 = paddle._C_ops.matmul(softmax_17, slice_124, False, False) + del slice_124, softmax_17 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_116 = paddle._C_ops.transpose(matmul_106, [0, 2, 1, 3]) + del matmul_106 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_114 = [slice_121, full_4, full_18] + del slice_121 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_112 = paddle._C_ops.stack(combine_114, 0) + del combine_114 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(transpose_116, stack_112) + del stack_112, transpose_116 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_107 = paddle._C_ops.matmul(reshape_263, parameter_86, False, False) + del parameter_86, reshape_263 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_132 = paddle._C_ops.add(matmul_107, parameter_85) + del matmul_107, parameter_85 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_264 = paddle._C_ops.reshape(add_132, full_int_array_38) + del add_132 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(reshape_264, full_int_array_40) + del reshape_264 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_117 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_266 = paddle._C_ops.reshape(transpose_117, full_int_array_41) + del transpose_117 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_266, full_int_array_29, [1, 2]) + del reshape_266 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_115 = [slice_118, full_30, full_18] + del slice_118 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_113 = paddle._C_ops.stack(combine_115, 0) + del combine_115 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_267 = paddle._C_ops.reshape(roll_17, stack_113) + del roll_17, stack_113 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_133 = paddle._C_ops.add(add_128, reshape_267) + del add_128, reshape_267 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_133, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_108 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del layer_norm_114, parameter_82 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_134 = paddle._C_ops.add(matmul_108, parameter_81) + del matmul_108, parameter_81 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_17 = paddle._C_ops.gelu(add_134, False) + del add_134 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_109 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del gelu_17, parameter_80 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_135 = paddle._C_ops.add(matmul_109, parameter_79) + del matmul_109, parameter_79 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_136 = paddle._C_ops.add(add_133, add_135) + del add_133, add_135 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_71 = paddle._C_ops.shape64(add_136) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_125 = paddle._C_ops.slice( + shape64_71, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_71 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_136, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_116 = [slice_125, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_114 = paddle._C_ops.stack(combine_116, 0) + del combine_116 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_268 = paddle._C_ops.reshape(layer_norm_117, stack_114) + del layer_norm_117, stack_114 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_72 = paddle._C_ops.shape64(reshape_268) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_126 = paddle._C_ops.slice( + shape64_72, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_72 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_117 = [slice_126, full_29, full_3, full_29, full_3, full_18] + del slice_126 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_115 = paddle._C_ops.stack(combine_117, 0) + del combine_117 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_269 = paddle._C_ops.reshape(reshape_268, stack_115) + del reshape_268, stack_115 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_118 = paddle._C_ops.transpose(reshape_269, [0, 1, 3, 2, 4, 5]) + del reshape_269 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_270 = paddle._C_ops.reshape(transpose_118, full_int_array_38) + del transpose_118 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_271 = paddle._C_ops.reshape(reshape_270, full_int_array_39) + del reshape_270 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_73 = paddle._C_ops.shape64(reshape_271) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_127 = paddle._C_ops.slice( + shape64_73, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_73 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_110 = paddle._C_ops.matmul(reshape_271, parameter_76, False, False) + del parameter_76, reshape_271 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_137 = paddle._C_ops.add(matmul_110, parameter_75) + del matmul_110, parameter_75 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_118 = [slice_127, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_116 = paddle._C_ops.stack(combine_118, 0) + del combine_118 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_272 = paddle._C_ops.reshape(add_137, stack_116) + del add_137, stack_116 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_119 = paddle._C_ops.transpose(reshape_272, [2, 0, 3, 1, 4]) + del reshape_272 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_128 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_129 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_130 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_119 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_128, full_8, float("0"), True) + del slice_128 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_120 = paddle._C_ops.transpose(slice_129, [0, 1, 3, 2]) + del slice_129 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_111 = paddle._C_ops.matmul(scale_18, transpose_120, False, False) + del scale_18, transpose_120 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_273 = paddle._C_ops.reshape(data_37, full_int_array_7) + del data_37 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_18 = paddle._C_ops.index_select(data_38, reshape_273, 0) + del data_38, reshape_273 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_274 = paddle._C_ops.reshape(index_select_18, full_int_array_8) + del index_select_18 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_121 = paddle._C_ops.transpose(reshape_274, [2, 0, 1]) + del reshape_274 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(transpose_121, full_int_array_0) + del transpose_121 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_138 = paddle._C_ops.add(matmul_111, unsqueeze_54) + del matmul_111, unsqueeze_54 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_18 = paddle._C_ops.softmax(add_138, -1) + del add_138 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_112 = paddle._C_ops.matmul(softmax_18, slice_130, False, False) + del slice_130, softmax_18 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_122 = paddle._C_ops.transpose(matmul_112, [0, 2, 1, 3]) + del matmul_112 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_119 = [slice_127, full_4, full_18] + del slice_127 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_117 = paddle._C_ops.stack(combine_119, 0) + del combine_119 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(transpose_122, stack_117) + del stack_117, transpose_122 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_113 = paddle._C_ops.matmul(reshape_275, parameter_74, False, False) + del parameter_74, reshape_275 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_139 = paddle._C_ops.add(matmul_113, parameter_73) + del matmul_113, parameter_73 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_276 = paddle._C_ops.reshape(add_139, full_int_array_38) + del add_139 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(reshape_276, full_int_array_40) + del reshape_276 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_123 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_278 = paddle._C_ops.reshape(transpose_123, full_int_array_41) + del transpose_123 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_120 = [slice_125, full_30, full_18] + del slice_125 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_118 = paddle._C_ops.stack(combine_120, 0) + del combine_120 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_279 = paddle._C_ops.reshape(reshape_278, stack_118) + del reshape_278, stack_118 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_140 = paddle._C_ops.add(add_136, reshape_279) + del add_136, reshape_279 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_114 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del layer_norm_120, parameter_70 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_141 = paddle._C_ops.add(matmul_114, parameter_69) + del matmul_114, parameter_69 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_18 = paddle._C_ops.gelu(add_141, False) + del add_141 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_115 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del gelu_18, parameter_68 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_142 = paddle._C_ops.add(matmul_115, parameter_67) + del matmul_115, parameter_67 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_143 = paddle._C_ops.add(add_140, add_142) + del add_140, add_142 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_74 = paddle._C_ops.shape64(add_143) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_131 = paddle._C_ops.slice( + shape64_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_74 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_121 = [slice_131, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_119 = paddle._C_ops.stack(combine_121, 0) + del combine_121 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_280 = paddle._C_ops.reshape(layer_norm_123, stack_119) + del layer_norm_123, stack_119 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_75 = paddle._C_ops.shape64(reshape_280) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_132 = paddle._C_ops.slice( + shape64_75, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_75 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_280, full_int_array_11, [1, 2]) + del reshape_280 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_76 = paddle._C_ops.shape64(roll_18) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_133 = paddle._C_ops.slice( + shape64_76, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_76 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_122 = [slice_133, full_29, full_3, full_29, full_3, full_18] + del slice_133 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_120 = paddle._C_ops.stack(combine_122, 0) + del combine_122 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_281 = paddle._C_ops.reshape(roll_18, stack_120) + del roll_18, stack_120 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_124 = paddle._C_ops.transpose(reshape_281, [0, 1, 3, 2, 4, 5]) + del reshape_281 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_282 = paddle._C_ops.reshape(transpose_124, full_int_array_38) + del transpose_124 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_283 = paddle._C_ops.reshape(reshape_282, full_int_array_39) + del reshape_282 + + # pd_op.full: (1x24x24x1xf32) <- () + full_41 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_41, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_41 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(set_value__9, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_125 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_285 = paddle._C_ops.reshape(transpose_125, full_int_array_27) + del transpose_125 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_286 = paddle._C_ops.reshape(reshape_285, full_int_array_28) + del reshape_285 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_286, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_286, full_int_array_5) + del reshape_286 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_32, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_19 = paddle._C_ops.where(equal_9, full_33, where_18) + del equal_9, where_18 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_77 = paddle._C_ops.shape64(reshape_283) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_134 = paddle._C_ops.slice( + shape64_77, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_77 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_116 = paddle._C_ops.matmul(reshape_283, parameter_64, False, False) + del parameter_64, reshape_283 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_144 = paddle._C_ops.add(matmul_116, parameter_63) + del matmul_116, parameter_63 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_123 = [slice_134, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_121 = paddle._C_ops.stack(combine_123, 0) + del combine_123 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_287 = paddle._C_ops.reshape(add_144, stack_121) + del add_144, stack_121 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_126 = paddle._C_ops.transpose(reshape_287, [2, 0, 3, 1, 4]) + del reshape_287 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_135 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_136 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_137 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_126 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_135, full_8, float("0"), True) + del slice_135 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_127 = paddle._C_ops.transpose(slice_136, [0, 1, 3, 2]) + del slice_136 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_117 = paddle._C_ops.matmul(scale_19, transpose_127, False, False) + del scale_19, transpose_127 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_288 = paddle._C_ops.reshape(data_39, full_int_array_7) + del data_39 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_19 = paddle._C_ops.index_select(data_40, reshape_288, 0) + del data_40, reshape_288 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_289 = paddle._C_ops.reshape(index_select_19, full_int_array_8) + del index_select_19 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_128 = paddle._C_ops.transpose(reshape_289, [2, 0, 1]) + del reshape_289 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(transpose_128, full_int_array_0) + del transpose_128 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_145 = paddle._C_ops.add(matmul_117, unsqueeze_57) + del matmul_117, unsqueeze_57 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_9 = paddle._C_ops.floor_divide(slice_134, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_124 = [floor_divide_9, full_6, full_26, full_4, full_4] + del floor_divide_9 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_122 = paddle._C_ops.stack(combine_124, 0) + del combine_124 + + # pd_op.reshape: (-1x4x16x144x144xf32) <- (-1x16x144x144xf32, 5xi64) + reshape_290 = paddle._C_ops.reshape(add_145, stack_122) + del add_145, stack_122 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(where_19, full_int_array_1) + del where_19 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(unsqueeze_58, full_int_array_0) + del unsqueeze_58 + + # pd_op.add: (-1x4x16x144x144xf32) <- (-1x4x16x144x144xf32, 1x4x1x144x144xf32) + add_146 = paddle._C_ops.add(reshape_290, unsqueeze_59) + del reshape_290, unsqueeze_59 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_125 = [slice_134, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_123 = paddle._C_ops.stack(combine_125, 0) + del combine_125 + + # pd_op.reshape: (-1x16x144x144xf32) <- (-1x4x16x144x144xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(add_146, stack_123) + del add_146, stack_123 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_19 = paddle._C_ops.softmax(reshape_291, -1) + del reshape_291 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_118 = paddle._C_ops.matmul(softmax_19, slice_137, False, False) + del slice_137, softmax_19 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_129 = paddle._C_ops.transpose(matmul_118, [0, 2, 1, 3]) + del matmul_118 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_126 = [slice_134, full_4, full_18] + del slice_134 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_124 = paddle._C_ops.stack(combine_126, 0) + del combine_126 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_292 = paddle._C_ops.reshape(transpose_129, stack_124) + del stack_124, transpose_129 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_119 = paddle._C_ops.matmul(reshape_292, parameter_62, False, False) + del parameter_62, reshape_292 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_147 = paddle._C_ops.add(matmul_119, parameter_61) + del matmul_119, parameter_61 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_293 = paddle._C_ops.reshape(add_147, full_int_array_38) + del add_147 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_294 = paddle._C_ops.reshape(reshape_293, full_int_array_40) + del reshape_293 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_130 = paddle._C_ops.transpose(reshape_294, [0, 1, 3, 2, 4, 5]) + del reshape_294 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(transpose_130, full_int_array_41) + del transpose_130 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_295, full_int_array_29, [1, 2]) + del reshape_295 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_127 = [slice_131, full_30, full_18] + del slice_131 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_125 = paddle._C_ops.stack(combine_127, 0) + del combine_127 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_296 = paddle._C_ops.reshape(roll_19, stack_125) + del roll_19, stack_125 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_148 = paddle._C_ops.add(add_143, reshape_296) + del add_143, reshape_296 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_148, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_120 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del layer_norm_126, parameter_58 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_149 = paddle._C_ops.add(matmul_120, parameter_57) + del matmul_120, parameter_57 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_19 = paddle._C_ops.gelu(add_149, False) + del add_149 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_121 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del gelu_19, parameter_56 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_150 = paddle._C_ops.add(matmul_121, parameter_55) + del matmul_121, parameter_55 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_151 = paddle._C_ops.add(add_148, add_150) + del add_148, add_150 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_78 = paddle._C_ops.shape64(add_151) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_138 = paddle._C_ops.slice( + shape64_78, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_78 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_128 = [slice_138, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_126 = paddle._C_ops.stack(combine_128, 0) + del combine_128 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_297 = paddle._C_ops.reshape(layer_norm_129, stack_126) + del layer_norm_129, stack_126 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_79 = paddle._C_ops.shape64(reshape_297) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_139 = paddle._C_ops.slice( + shape64_79, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_79 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_129 = [slice_139, full_29, full_3, full_29, full_3, full_18] + del slice_139 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_127 = paddle._C_ops.stack(combine_129, 0) + del combine_129 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_298 = paddle._C_ops.reshape(reshape_297, stack_127) + del reshape_297, stack_127 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_131 = paddle._C_ops.transpose(reshape_298, [0, 1, 3, 2, 4, 5]) + del reshape_298 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_299 = paddle._C_ops.reshape(transpose_131, full_int_array_38) + del transpose_131 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_300 = paddle._C_ops.reshape(reshape_299, full_int_array_39) + del reshape_299 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_80 = paddle._C_ops.shape64(reshape_300) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_140 = paddle._C_ops.slice( + shape64_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_80 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_122 = paddle._C_ops.matmul(reshape_300, parameter_52, False, False) + del parameter_52, reshape_300 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_152 = paddle._C_ops.add(matmul_122, parameter_51) + del matmul_122, parameter_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_130 = [slice_140, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_128 = paddle._C_ops.stack(combine_130, 0) + del combine_130 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_301 = paddle._C_ops.reshape(add_152, stack_128) + del add_152, stack_128 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_132 = paddle._C_ops.transpose(reshape_301, [2, 0, 3, 1, 4]) + del reshape_301 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_141 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_142 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_143 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_132 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_141, full_8, float("0"), True) + del slice_141 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_133 = paddle._C_ops.transpose(slice_142, [0, 1, 3, 2]) + del slice_142 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_123 = paddle._C_ops.matmul(scale_20, transpose_133, False, False) + del scale_20, transpose_133 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_302 = paddle._C_ops.reshape(data_41, full_int_array_7) + del data_41 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_20 = paddle._C_ops.index_select(data_42, reshape_302, 0) + del data_42, reshape_302 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_303 = paddle._C_ops.reshape(index_select_20, full_int_array_8) + del index_select_20 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_134 = paddle._C_ops.transpose(reshape_303, [2, 0, 1]) + del reshape_303 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(transpose_134, full_int_array_0) + del transpose_134 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_153 = paddle._C_ops.add(matmul_123, unsqueeze_60) + del matmul_123, unsqueeze_60 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_20 = paddle._C_ops.softmax(add_153, -1) + del add_153 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_20, slice_143, False, False) + del slice_143, softmax_20 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_135 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_131 = [slice_140, full_4, full_18] + del slice_140 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_129 = paddle._C_ops.stack(combine_131, 0) + del combine_131 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_304 = paddle._C_ops.reshape(transpose_135, stack_129) + del stack_129, transpose_135 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_125 = paddle._C_ops.matmul(reshape_304, parameter_50, False, False) + del parameter_50, reshape_304 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_154 = paddle._C_ops.add(matmul_125, parameter_49) + del matmul_125, parameter_49 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_305 = paddle._C_ops.reshape(add_154, full_int_array_38) + del add_154 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_306 = paddle._C_ops.reshape(reshape_305, full_int_array_40) + del reshape_305 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_136 = paddle._C_ops.transpose(reshape_306, [0, 1, 3, 2, 4, 5]) + del reshape_306 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(transpose_136, full_int_array_41) + del transpose_136 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_132 = [slice_138, full_30, full_18] + del slice_138 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_130 = paddle._C_ops.stack(combine_132, 0) + del combine_132 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_308 = paddle._C_ops.reshape(reshape_307, stack_130) + del reshape_307, stack_130 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_155 = paddle._C_ops.add(add_151, reshape_308) + del add_151, reshape_308 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_155, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_126 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del layer_norm_132, parameter_46 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_156 = paddle._C_ops.add(matmul_126, parameter_45) + del matmul_126, parameter_45 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_20 = paddle._C_ops.gelu(add_156, False) + del add_156 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_127 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del gelu_20, parameter_44 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_157 = paddle._C_ops.add(matmul_127, parameter_43) + del matmul_127, parameter_43 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_158 = paddle._C_ops.add(add_155, add_157) + del add_155, add_157 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_81 = paddle._C_ops.shape64(add_158) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_144 = paddle._C_ops.slice( + shape64_81, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_81 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_158, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_133 = [slice_144, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_131 = paddle._C_ops.stack(combine_133, 0) + del combine_133 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_309 = paddle._C_ops.reshape(layer_norm_135, stack_131) + del layer_norm_135, stack_131 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_82 = paddle._C_ops.shape64(reshape_309) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_145 = paddle._C_ops.slice( + shape64_82, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_82 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_309, full_int_array_11, [1, 2]) + del reshape_309 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_83 = paddle._C_ops.shape64(roll_20) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_146 = paddle._C_ops.slice( + shape64_83, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_83 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_134 = [slice_146, full_29, full_3, full_29, full_3, full_18] + del full_29, slice_146 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_132 = paddle._C_ops.stack(combine_134, 0) + del combine_134 + + # pd_op.reshape: (-1x2x12x2x12x512xf32) <- (-1x24x24x512xf32, 6xi64) + reshape_310 = paddle._C_ops.reshape(roll_20, stack_132) + del roll_20, stack_132 + + # pd_op.transpose: (-1x2x2x12x12x512xf32) <- (-1x2x12x2x12x512xf32) + transpose_137 = paddle._C_ops.transpose(reshape_310, [0, 1, 3, 2, 4, 5]) + del reshape_310 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x2x2x12x12x512xf32, 4xi64) + reshape_311 = paddle._C_ops.reshape(transpose_137, full_int_array_38) + del transpose_137 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x12x12x512xf32, 3xi64) + reshape_312 = paddle._C_ops.reshape(reshape_311, full_int_array_39) + del full_int_array_39, reshape_311 + + # pd_op.full: (1x24x24x1xf32) <- () + full_42 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_42, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_42 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(set_value__10, full_int_array_42) + del full_int_array_42 + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_138 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_314 = paddle._C_ops.reshape(transpose_138, full_int_array_27) + del transpose_138 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_315 = paddle._C_ops.reshape(reshape_314, full_int_array_28) + del reshape_314 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_315, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_315, full_int_array_5) + del reshape_315 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_32, subtract_10) + del full_32, not_equal_10, subtract_10 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_21 = paddle._C_ops.where(equal_10, full_33, where_20) + del equal_10, full_33, where_20 + + # pd_op.shape64: (3xi64) <- (-1x144x512xf32) + shape64_84 = paddle._C_ops.shape64(reshape_312) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_147 = paddle._C_ops.slice( + shape64_84, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_84 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x512xf32, 512x1536xf32) + matmul_128 = paddle._C_ops.matmul(reshape_312, parameter_40, False, False) + del parameter_40, reshape_312 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_159 = paddle._C_ops.add(matmul_128, parameter_39) + del matmul_128, parameter_39 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_135 = [slice_147, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_133 = paddle._C_ops.stack(combine_135, 0) + del combine_135 + + # pd_op.reshape: (-1x144x3x16x32xf32) <- (-1x144x1536xf32, 5xi64) + reshape_316 = paddle._C_ops.reshape(add_159, stack_133) + del add_159, stack_133 + + # pd_op.transpose: (3x-1x16x144x32xf32) <- (-1x144x3x16x32xf32) + transpose_139 = paddle._C_ops.transpose(reshape_316, [2, 0, 3, 1, 4]) + del reshape_316 + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_148 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_149 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x144x32xf32) <- (3x-1x16x144x32xf32, 1xi64, 1xi64) + slice_150 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_139 + + # pd_op.scale: (-1x16x144x32xf32) <- (-1x16x144x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_148, full_8, float("0"), True) + del slice_148 + + # pd_op.transpose: (-1x16x32x144xf32) <- (-1x16x144x32xf32) + transpose_140 = paddle._C_ops.transpose(slice_149, [0, 1, 3, 2]) + del slice_149 + + # pd_op.matmul: (-1x16x144x144xf32) <- (-1x16x144x32xf32, -1x16x32x144xf32) + matmul_129 = paddle._C_ops.matmul(scale_21, transpose_140, False, False) + del scale_21, transpose_140 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_317 = paddle._C_ops.reshape(data_43, full_int_array_7) + del data_43 + + # pd_op.index_select: (20736x16xf32) <- (529x16xf32, 20736xi64) + index_select_21 = paddle._C_ops.index_select(data_44, reshape_317, 0) + del data_44, reshape_317 + + # pd_op.reshape: (144x144x16xf32) <- (20736x16xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_21, full_int_array_8) + del index_select_21 + + # pd_op.transpose: (16x144x144xf32) <- (144x144x16xf32) + transpose_141 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x16x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(transpose_141, full_int_array_0) + del transpose_141 + + # pd_op.add: (-1x16x144x144xf32) <- (-1x16x144x144xf32, 1x16x144x144xf32) + add_160 = paddle._C_ops.add(matmul_129, unsqueeze_63) + del matmul_129, unsqueeze_63 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_10 = paddle._C_ops.floor_divide(slice_147, full_34) + del full_34 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_136 = [floor_divide_10, full_6, full_26, full_4, full_4] + del floor_divide_10, full_6 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_134 = paddle._C_ops.stack(combine_136, 0) + del combine_136 + + # pd_op.reshape: (-1x4x16x144x144xf32) <- (-1x16x144x144xf32, 5xi64) + reshape_319 = paddle._C_ops.reshape(add_160, stack_134) + del add_160, stack_134 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(where_21, full_int_array_1) + del where_21 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(unsqueeze_64, full_int_array_0) + del unsqueeze_64 + + # pd_op.add: (-1x4x16x144x144xf32) <- (-1x4x16x144x144xf32, 1x4x1x144x144xf32) + add_161 = paddle._C_ops.add(reshape_319, unsqueeze_65) + del reshape_319, unsqueeze_65 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_137 = [slice_147, full_26, full_4, full_4] + del full_26 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_135 = paddle._C_ops.stack(combine_137, 0) + del combine_137 + + # pd_op.reshape: (-1x16x144x144xf32) <- (-1x4x16x144x144xf32, 4xi64) + reshape_320 = paddle._C_ops.reshape(add_161, stack_135) + del add_161, stack_135 + + # pd_op.softmax: (-1x16x144x144xf32) <- (-1x16x144x144xf32) + softmax_21 = paddle._C_ops.softmax(reshape_320, -1) + del reshape_320 + + # pd_op.matmul: (-1x16x144x32xf32) <- (-1x16x144x144xf32, -1x16x144x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_21, slice_150, False, False) + del slice_150, softmax_21 + + # pd_op.transpose: (-1x144x16x32xf32) <- (-1x16x144x32xf32) + transpose_142 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_138 = [slice_147, full_4, full_18] + del slice_147 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_136 = paddle._C_ops.stack(combine_138, 0) + del combine_138 + + # pd_op.reshape: (-1x144x512xf32) <- (-1x144x16x32xf32, 3xi64) + reshape_321 = paddle._C_ops.reshape(transpose_142, stack_136) + del stack_136, transpose_142 + + # pd_op.matmul: (-1x144x512xf32) <- (-1x144x512xf32, 512x512xf32) + matmul_131 = paddle._C_ops.matmul(reshape_321, parameter_38, False, False) + del parameter_38, reshape_321 + + # pd_op.add: (-1x144x512xf32) <- (-1x144x512xf32, 512xf32) + add_162 = paddle._C_ops.add(matmul_131, parameter_37) + del matmul_131, parameter_37 + + # pd_op.reshape: (-1x12x12x512xf32) <- (-1x144x512xf32, 4xi64) + reshape_322 = paddle._C_ops.reshape(add_162, full_int_array_38) + del add_162, full_int_array_38 + + # pd_op.reshape: (-1x2x2x12x12x512xf32) <- (-1x12x12x512xf32, 6xi64) + reshape_323 = paddle._C_ops.reshape(reshape_322, full_int_array_40) + del full_int_array_40, reshape_322 + + # pd_op.transpose: (-1x2x12x2x12x512xf32) <- (-1x2x2x12x12x512xf32) + transpose_143 = paddle._C_ops.transpose(reshape_323, [0, 1, 3, 2, 4, 5]) + del reshape_323 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x2x12x2x12x512xf32, 4xi64) + reshape_324 = paddle._C_ops.reshape(transpose_143, full_int_array_41) + del full_int_array_41, transpose_143 + + # pd_op.roll: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_324, full_int_array_29, [1, 2]) + del reshape_324 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_139 = [slice_144, full_30, full_18] + del full_30, slice_144 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_137 = paddle._C_ops.stack(combine_139, 0) + del combine_139 + + # pd_op.reshape: (-1x576x512xf32) <- (-1x24x24x512xf32, 3xi64) + reshape_325 = paddle._C_ops.reshape(roll_21, stack_137) + del roll_21, stack_137 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_163 = paddle._C_ops.add(add_158, reshape_325) + del add_158, reshape_325 + + # pd_op.layer_norm: (-1x576x512xf32, -1x576xf32, -1x576xf32) <- (-1x576x512xf32, 512xf32, 512xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_163, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (-1x576x2048xf32) <- (-1x576x512xf32, 512x2048xf32) + matmul_132 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del layer_norm_138, parameter_34 + + # pd_op.add: (-1x576x2048xf32) <- (-1x576x2048xf32, 2048xf32) + add_164 = paddle._C_ops.add(matmul_132, parameter_33) + del matmul_132, parameter_33 + + # pd_op.gelu: (-1x576x2048xf32) <- (-1x576x2048xf32) + gelu_21 = paddle._C_ops.gelu(add_164, False) + del add_164 + + # pd_op.matmul: (-1x576x512xf32) <- (-1x576x2048xf32, 2048x512xf32) + matmul_133 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del gelu_21, parameter_32 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, 512xf32) + add_165 = paddle._C_ops.add(matmul_133, parameter_31) + del matmul_133, parameter_31 + + # pd_op.add: (-1x576x512xf32) <- (-1x576x512xf32, -1x576x512xf32) + add_166 = paddle._C_ops.add(add_163, add_165) + del add_163, add_165 + + # pd_op.shape64: (3xi64) <- (-1x576x512xf32) + shape64_85 = paddle._C_ops.shape64(add_166) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_151 = paddle._C_ops.slice( + shape64_85, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_85 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_140 = [slice_151, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_138 = paddle._C_ops.stack(combine_140, 0) + del combine_140 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x576x512xf32, 4xi64) + reshape_326 = paddle._C_ops.reshape(add_166, stack_138) + del add_166, stack_138 + + # pd_op.strided_slice: (-1x12x12x512xf32) <- (-1x24x24x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x12x12x512xf32) <- (-1x24x24x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + del full_int_array_31 + + # pd_op.strided_slice: (-1x12x12x512xf32) <- (-1x24x24x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + del full_int_array_32 + + # pd_op.strided_slice: (-1x12x12x512xf32) <- (-1x24x24x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + del full_int_array_30 + + # pd_op.shape64: (4xi64) <- (-1x24x24x512xf32) + shape64_86 = paddle._C_ops.shape64(reshape_326) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_152 = paddle._C_ops.slice( + shape64_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_86 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_141 = [slice_152, full_28, full_28, full_18] + del full_18, full_28, slice_152 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_139 = paddle._C_ops.stack(combine_141, 0) + del combine_141 + + # pd_op.reshape: (-1x24x24x512xf32) <- (-1x24x24x512xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(reshape_326, stack_139) + del reshape_326, stack_139 + + # builtin.combine: ([-1x12x12x512xf32, -1x12x12x512xf32, -1x12x12x512xf32, -1x12x12x512xf32]) <- (-1x12x12x512xf32, -1x12x12x512xf32, -1x12x12x512xf32, -1x12x12x512xf32) + combine_142 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + del strided_slice_10, strided_slice_11, strided_slice_8, strided_slice_9 + + # pd_op.concat: (-1x12x12x2048xf32) <- ([-1x12x12x512xf32, -1x12x12x512xf32, -1x12x12x512xf32, -1x12x12x512xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_142, full_16) + del combine_142, full_16 + + # pd_op.full: (xi64) <- () + full_43 = paddle._C_ops.full( + [], float("2048"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_143 = [slice_151, full_17, full_43] + del full_17, full_43, slice_151 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_140 = paddle._C_ops.stack(combine_143, 0) + del combine_143 + + # pd_op.reshape: (-1x-1x2048xf32) <- (-1x12x12x2048xf32, 3xi64) + reshape_328 = paddle._C_ops.reshape(concat_2, stack_140) + del concat_2, stack_140 + + # pd_op.layer_norm: (-1x-1x2048xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x2048xf32, 2048xf32, 2048xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_328, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30, reshape_328 + + # pd_op.matmul: (-1x-1x1024xf32) <- (-1x-1x2048xf32, 2048x1024xf32) + matmul_134 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del layer_norm_141, parameter_28 + + # pd_op.shape64: (3xi64) <- (-1x-1x1024xf32) + shape64_87 = paddle._C_ops.shape64(matmul_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_153 = paddle._C_ops.slice( + shape64_87, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_87 + + # pd_op.shape64: (3xi64) <- (-1x-1x1024xf32) + shape64_88 = paddle._C_ops.shape64(matmul_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_154 = paddle._C_ops.slice( + shape64_88, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_88 + + # pd_op.layer_norm: (-1x-1x1024xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1024xf32, 1024xf32, 1024xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_134, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_144 = [slice_153, full_3, full_3, full_27] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_141 = paddle._C_ops.stack(combine_144, 0) + del combine_144 + + # pd_op.reshape: (-1x12x12x1024xf32) <- (-1x-1x1024xf32, 4xi64) + reshape_329 = paddle._C_ops.reshape(layer_norm_144, stack_141) + del layer_norm_144, stack_141 + + # pd_op.shape64: (4xi64) <- (-1x12x12x1024xf32) + shape64_89 = paddle._C_ops.shape64(reshape_329) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_155 = paddle._C_ops.slice( + shape64_89, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_89 + + # pd_op.full: (xi64) <- () + full_44 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_145 = [slice_155, full_44, full_3, full_44, full_3, full_27] + del slice_155 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_142 = paddle._C_ops.stack(combine_145, 0) + del combine_145 + + # pd_op.reshape: (-1x1x12x1x12x1024xf32) <- (-1x12x12x1024xf32, 6xi64) + reshape_330 = paddle._C_ops.reshape(reshape_329, stack_142) + del reshape_329, stack_142 + + # pd_op.transpose: (-1x1x1x12x12x1024xf32) <- (-1x1x12x1x12x1024xf32) + transpose_144 = paddle._C_ops.transpose(reshape_330, [0, 1, 3, 2, 4, 5]) + del reshape_330 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_43 = [-1, 12, 12, 1024] + + # pd_op.reshape: (-1x12x12x1024xf32) <- (-1x1x1x12x12x1024xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(transpose_144, full_int_array_43) + del transpose_144 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_44 = [-1, 144, 1024] + + # pd_op.reshape: (-1x144x1024xf32) <- (-1x12x12x1024xf32, 3xi64) + reshape_332 = paddle._C_ops.reshape(reshape_331, full_int_array_44) + del reshape_331 + + # pd_op.shape64: (3xi64) <- (-1x144x1024xf32) + shape64_90 = paddle._C_ops.shape64(reshape_332) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_156 = paddle._C_ops.slice( + shape64_90, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_90 + + # pd_op.matmul: (-1x144x3072xf32) <- (-1x144x1024xf32, 1024x3072xf32) + matmul_135 = paddle._C_ops.matmul(reshape_332, parameter_25, False, False) + del parameter_25, reshape_332 + + # pd_op.add: (-1x144x3072xf32) <- (-1x144x3072xf32, 3072xf32) + add_167 = paddle._C_ops.add(matmul_135, parameter_24) + del matmul_135, parameter_24 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_146 = [slice_156, full_4, full_5, full_7, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_143 = paddle._C_ops.stack(combine_146, 0) + del combine_146 + + # pd_op.reshape: (-1x144x3x32x32xf32) <- (-1x144x3072xf32, 5xi64) + reshape_333 = paddle._C_ops.reshape(add_167, stack_143) + del add_167, stack_143 + + # pd_op.transpose: (3x-1x32x144x32xf32) <- (-1x144x3x32x32xf32) + transpose_145 = paddle._C_ops.transpose(reshape_333, [2, 0, 3, 1, 4]) + del reshape_333 + + # pd_op.slice: (-1x32x144x32xf32) <- (3x-1x32x144x32xf32, 1xi64, 1xi64) + slice_157 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x32x144x32xf32) <- (3x-1x32x144x32xf32, 1xi64, 1xi64) + slice_158 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x32x144x32xf32) <- (3x-1x32x144x32xf32, 1xi64, 1xi64) + slice_159 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_145 + + # pd_op.scale: (-1x32x144x32xf32) <- (-1x32x144x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_157, full_8, float("0"), True) + del slice_157 + + # pd_op.transpose: (-1x32x32x144xf32) <- (-1x32x144x32xf32) + transpose_146 = paddle._C_ops.transpose(slice_158, [0, 1, 3, 2]) + del slice_158 + + # pd_op.matmul: (-1x32x144x144xf32) <- (-1x32x144x32xf32, -1x32x32x144xf32) + matmul_136 = paddle._C_ops.matmul(scale_22, transpose_146, False, False) + del scale_22, transpose_146 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_334 = paddle._C_ops.reshape(data_45, full_int_array_7) + del data_45 + + # pd_op.index_select: (20736x32xf32) <- (529x32xf32, 20736xi64) + index_select_22 = paddle._C_ops.index_select(data_46, reshape_334, 0) + del data_46, reshape_334 + + # pd_op.reshape: (144x144x32xf32) <- (20736x32xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_22, full_int_array_8) + del index_select_22 + + # pd_op.transpose: (32x144x144xf32) <- (144x144x32xf32) + transpose_147 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x32x144x144xf32) <- (32x144x144xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(transpose_147, full_int_array_0) + del transpose_147 + + # pd_op.add: (-1x32x144x144xf32) <- (-1x32x144x144xf32, 1x32x144x144xf32) + add_168 = paddle._C_ops.add(matmul_136, unsqueeze_66) + del matmul_136, unsqueeze_66 + + # pd_op.softmax: (-1x32x144x144xf32) <- (-1x32x144x144xf32) + softmax_22 = paddle._C_ops.softmax(add_168, -1) + del add_168 + + # pd_op.matmul: (-1x32x144x32xf32) <- (-1x32x144x144xf32, -1x32x144x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_22, slice_159, False, False) + del slice_159, softmax_22 + + # pd_op.transpose: (-1x144x32x32xf32) <- (-1x32x144x32xf32) + transpose_148 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_147 = [slice_156, full_4, full_27] + del slice_156 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_144 = paddle._C_ops.stack(combine_147, 0) + del combine_147 + + # pd_op.reshape: (-1x144x1024xf32) <- (-1x144x32x32xf32, 3xi64) + reshape_336 = paddle._C_ops.reshape(transpose_148, stack_144) + del stack_144, transpose_148 + + # pd_op.matmul: (-1x144x1024xf32) <- (-1x144x1024xf32, 1024x1024xf32) + matmul_138 = paddle._C_ops.matmul(reshape_336, parameter_23, False, False) + del parameter_23, reshape_336 + + # pd_op.add: (-1x144x1024xf32) <- (-1x144x1024xf32, 1024xf32) + add_169 = paddle._C_ops.add(matmul_138, parameter_22) + del matmul_138, parameter_22 + + # pd_op.reshape: (-1x12x12x1024xf32) <- (-1x144x1024xf32, 4xi64) + reshape_337 = paddle._C_ops.reshape(add_169, full_int_array_43) + del add_169 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_45 = [-1, 1, 1, 12, 12, 1024] + + # pd_op.reshape: (-1x1x1x12x12x1024xf32) <- (-1x12x12x1024xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(reshape_337, full_int_array_45) + del reshape_337 + + # pd_op.transpose: (-1x1x12x1x12x1024xf32) <- (-1x1x1x12x12x1024xf32) + transpose_149 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (-1x12x12x1024xf32) <- (-1x1x12x1x12x1024xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_149, full_int_array_43) + del transpose_149 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_148 = [slice_153, full_4, full_27] + del slice_153 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_145 = paddle._C_ops.stack(combine_148, 0) + del combine_148 + + # pd_op.reshape: (-1x144x1024xf32) <- (-1x12x12x1024xf32, 3xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, stack_145) + del reshape_339, stack_145 + + # pd_op.add: (-1x144x1024xf32) <- (-1x-1x1024xf32, -1x144x1024xf32) + add_170 = paddle._C_ops.add(matmul_134, reshape_340) + del matmul_134, reshape_340 + + # pd_op.layer_norm: (-1x144x1024xf32, -1x144xf32, -1x144xf32) <- (-1x144x1024xf32, 1024xf32, 1024xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_170, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (-1x144x4096xf32) <- (-1x144x1024xf32, 1024x4096xf32) + matmul_139 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del layer_norm_147, parameter_19 + + # pd_op.add: (-1x144x4096xf32) <- (-1x144x4096xf32, 4096xf32) + add_171 = paddle._C_ops.add(matmul_139, parameter_18) + del matmul_139, parameter_18 + + # pd_op.gelu: (-1x144x4096xf32) <- (-1x144x4096xf32) + gelu_22 = paddle._C_ops.gelu(add_171, False) + del add_171 + + # pd_op.matmul: (-1x144x1024xf32) <- (-1x144x4096xf32, 4096x1024xf32) + matmul_140 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del gelu_22, parameter_17 + + # pd_op.add: (-1x144x1024xf32) <- (-1x144x1024xf32, 1024xf32) + add_172 = paddle._C_ops.add(matmul_140, parameter_16) + del matmul_140, parameter_16 + + # pd_op.add: (-1x144x1024xf32) <- (-1x144x1024xf32, -1x144x1024xf32) + add_173 = paddle._C_ops.add(add_170, add_172) + del add_170, add_172 + + # pd_op.shape64: (3xi64) <- (-1x144x1024xf32) + shape64_91 = paddle._C_ops.shape64(add_173) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_160 = paddle._C_ops.slice( + shape64_91, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_91 + + # pd_op.layer_norm: (-1x144x1024xf32, -1x144xf32, -1x144xf32) <- (-1x144x1024xf32, 1024xf32, 1024xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_173, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_149 = [slice_160, full_3, full_3, full_27] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_146 = paddle._C_ops.stack(combine_149, 0) + del combine_149 + + # pd_op.reshape: (-1x12x12x1024xf32) <- (-1x144x1024xf32, 4xi64) + reshape_341 = paddle._C_ops.reshape(layer_norm_150, stack_146) + del layer_norm_150, stack_146 + + # pd_op.shape64: (4xi64) <- (-1x12x12x1024xf32) + shape64_92 = paddle._C_ops.shape64(reshape_341) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_161 = paddle._C_ops.slice( + shape64_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_92 + + # pd_op.roll: (-1x12x12x1024xf32) <- (-1x12x12x1024xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_341, full_int_array_11, [1, 2]) + del reshape_341 + + # pd_op.shape64: (4xi64) <- (-1x12x12x1024xf32) + shape64_93 = paddle._C_ops.shape64(roll_22) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_162 = paddle._C_ops.slice( + shape64_93, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_93 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_150 = [slice_162, full_44, full_3, full_44, full_3, full_27] + del full_3, slice_162 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_147 = paddle._C_ops.stack(combine_150, 0) + del combine_150 + + # pd_op.reshape: (-1x1x12x1x12x1024xf32) <- (-1x12x12x1024xf32, 6xi64) + reshape_342 = paddle._C_ops.reshape(roll_22, stack_147) + del roll_22, stack_147 + + # pd_op.transpose: (-1x1x1x12x12x1024xf32) <- (-1x1x12x1x12x1024xf32) + transpose_150 = paddle._C_ops.transpose(reshape_342, [0, 1, 3, 2, 4, 5]) + del reshape_342 + + # pd_op.reshape: (-1x12x12x1024xf32) <- (-1x1x1x12x12x1024xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(transpose_150, full_int_array_43) + del transpose_150 + + # pd_op.reshape: (-1x144x1024xf32) <- (-1x12x12x1024xf32, 3xi64) + reshape_344 = paddle._C_ops.reshape(reshape_343, full_int_array_44) + del full_int_array_44, reshape_343 + + # pd_op.full: (1x12x12x1xf32) <- () + full_45 = paddle._C_ops.full( + [1, 12, 12, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_45, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_45, full_int_array_12 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_15, set_value__100 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_17, full_int_array_18, set_value__101 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_19, set_value__102 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_13, set_value__103 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_16, full_int_array_21, set_value__104 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_22, full_int_array_23, set_value__105 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_20, full_int_array_24, set_value__106 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_11, full_int_array_25, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_46 = [1, 1, 12, 1, 12, 1] + + # pd_op.reshape: (1x1x12x1x12x1xf32) <- (1x12x12x1xf32, 6xi64) + reshape_345 = paddle._C_ops.reshape(set_value__11, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (1x1x1x12x12x1xf32) <- (1x1x12x1x12x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_345, [0, 1, 3, 2, 4, 5]) + del reshape_345 + + # pd_op.reshape: (1x12x12x1xf32) <- (1x1x1x12x12x1xf32, 4xi64) + reshape_346 = paddle._C_ops.reshape(transpose_151, full_int_array_27) + del full_int_array_27, transpose_151 + + # pd_op.reshape: (1x144xf32) <- (1x12x12x1xf32, 2xi64) + reshape_347 = paddle._C_ops.reshape(reshape_346, full_int_array_28) + del full_int_array_28, reshape_346 + + # pd_op.unsqueeze: (1x1x144xf32) <- (1x144xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_347, full_int_array_1) + + # pd_op.unsqueeze: (1x144x1xf32) <- (1x144xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_347, full_int_array_5) + del reshape_347 + + # pd_op.subtract: (1x144x144xf32) <- (1x1x144xf32, 1x144x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (1x144x144xb) <- (1x144x144xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_11) + + # pd_op.full: (1x144x144xf32) <- () + full_46 = paddle._C_ops.full( + [1, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x144x144xf32) <- (1x144x144xb, 1x144x144xf32, 1x144x144xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_46, subtract_11) + del full_46, not_equal_11, subtract_11 + + # pd_op.equal: (1x144x144xb) <- (1x144x144xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_11) + del full_11 + + # pd_op.full: (1x144x144xf32) <- () + full_47 = paddle._C_ops.full( + [1, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x144x144xf32) <- (1x144x144xb, 1x144x144xf32, 1x144x144xf32) + where_23 = paddle._C_ops.where(equal_11, full_47, where_22) + del equal_11, full_47, where_22 + + # pd_op.shape64: (3xi64) <- (-1x144x1024xf32) + shape64_94 = paddle._C_ops.shape64(reshape_344) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_163 = paddle._C_ops.slice( + shape64_94, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_94 + + # pd_op.matmul: (-1x144x3072xf32) <- (-1x144x1024xf32, 1024x3072xf32) + matmul_141 = paddle._C_ops.matmul(reshape_344, parameter_13, False, False) + del parameter_13, reshape_344 + + # pd_op.add: (-1x144x3072xf32) <- (-1x144x3072xf32, 3072xf32) + add_174 = paddle._C_ops.add(matmul_141, parameter_12) + del matmul_141, parameter_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_151 = [slice_163, full_4, full_5, full_7, full_7] + del full_5 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_148 = paddle._C_ops.stack(combine_151, 0) + del combine_151 + + # pd_op.reshape: (-1x144x3x32x32xf32) <- (-1x144x3072xf32, 5xi64) + reshape_348 = paddle._C_ops.reshape(add_174, stack_148) + del add_174, stack_148 + + # pd_op.transpose: (3x-1x32x144x32xf32) <- (-1x144x3x32x32xf32) + transpose_152 = paddle._C_ops.transpose(reshape_348, [2, 0, 3, 1, 4]) + del reshape_348 + + # pd_op.slice: (-1x32x144x32xf32) <- (3x-1x32x144x32xf32, 1xi64, 1xi64) + slice_164 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x32x144x32xf32) <- (3x-1x32x144x32xf32, 1xi64, 1xi64) + slice_165 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x32x144x32xf32) <- (3x-1x32x144x32xf32, 1xi64, 1xi64) + slice_166 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del full_int_array_6, transpose_152 + + # pd_op.scale: (-1x32x144x32xf32) <- (-1x32x144x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_164, full_8, float("0"), True) + del full_8, slice_164 + + # pd_op.transpose: (-1x32x32x144xf32) <- (-1x32x144x32xf32) + transpose_153 = paddle._C_ops.transpose(slice_165, [0, 1, 3, 2]) + del slice_165 + + # pd_op.matmul: (-1x32x144x144xf32) <- (-1x32x144x32xf32, -1x32x32x144xf32) + matmul_142 = paddle._C_ops.matmul(scale_23, transpose_153, False, False) + del scale_23, transpose_153 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_349 = paddle._C_ops.reshape(data_47, full_int_array_7) + del data_47, full_int_array_7 + + # pd_op.index_select: (20736x32xf32) <- (529x32xf32, 20736xi64) + index_select_23 = paddle._C_ops.index_select(data_48, reshape_349, 0) + del data_48, reshape_349 + + # pd_op.reshape: (144x144x32xf32) <- (20736x32xf32, 3xi64) + reshape_350 = paddle._C_ops.reshape(index_select_23, full_int_array_8) + del full_int_array_8, index_select_23 + + # pd_op.transpose: (32x144x144xf32) <- (144x144x32xf32) + transpose_154 = paddle._C_ops.transpose(reshape_350, [2, 0, 1]) + del reshape_350 + + # pd_op.unsqueeze: (1x32x144x144xf32) <- (32x144x144xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(transpose_154, full_int_array_0) + del transpose_154 + + # pd_op.add: (-1x32x144x144xf32) <- (-1x32x144x144xf32, 1x32x144x144xf32) + add_175 = paddle._C_ops.add(matmul_142, unsqueeze_69) + del matmul_142, unsqueeze_69 + + # pd_op.full: (xi64) <- () + full_48 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_11 = paddle._C_ops.floor_divide(slice_163, full_48) + del full_48 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_152 = [floor_divide_11, full_44, full_7, full_4, full_4] + del floor_divide_11, full_44 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_149 = paddle._C_ops.stack(combine_152, 0) + del combine_152 + + # pd_op.reshape: (-1x1x32x144x144xf32) <- (-1x32x144x144xf32, 5xi64) + reshape_351 = paddle._C_ops.reshape(add_175, stack_149) + del add_175, stack_149 + + # pd_op.unsqueeze: (1x1x144x144xf32) <- (1x144x144xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(where_23, full_int_array_1) + del full_int_array_1, where_23 + + # pd_op.unsqueeze: (1x1x1x144x144xf32) <- (1x1x144x144xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(unsqueeze_70, full_int_array_0) + del full_int_array_0, unsqueeze_70 + + # pd_op.add: (-1x1x32x144x144xf32) <- (-1x1x32x144x144xf32, 1x1x1x144x144xf32) + add_176 = paddle._C_ops.add(reshape_351, unsqueeze_71) + del reshape_351, unsqueeze_71 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_153 = [slice_163, full_7, full_4, full_4] + del full_7 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_150 = paddle._C_ops.stack(combine_153, 0) + del combine_153 + + # pd_op.reshape: (-1x32x144x144xf32) <- (-1x1x32x144x144xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(add_176, stack_150) + del add_176, stack_150 + + # pd_op.softmax: (-1x32x144x144xf32) <- (-1x32x144x144xf32) + softmax_23 = paddle._C_ops.softmax(reshape_352, -1) + del reshape_352 + + # pd_op.matmul: (-1x32x144x32xf32) <- (-1x32x144x144xf32, -1x32x144x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_23, slice_166, False, False) + del slice_166, softmax_23 + + # pd_op.transpose: (-1x144x32x32xf32) <- (-1x32x144x32xf32) + transpose_155 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_154 = [slice_163, full_4, full_27] + del slice_163 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_151 = paddle._C_ops.stack(combine_154, 0) + del combine_154 + + # pd_op.reshape: (-1x144x1024xf32) <- (-1x144x32x32xf32, 3xi64) + reshape_353 = paddle._C_ops.reshape(transpose_155, stack_151) + del stack_151, transpose_155 + + # pd_op.matmul: (-1x144x1024xf32) <- (-1x144x1024xf32, 1024x1024xf32) + matmul_144 = paddle._C_ops.matmul(reshape_353, parameter_11, False, False) + del parameter_11, reshape_353 + + # pd_op.add: (-1x144x1024xf32) <- (-1x144x1024xf32, 1024xf32) + add_177 = paddle._C_ops.add(matmul_144, parameter_10) + del matmul_144, parameter_10 + + # pd_op.reshape: (-1x12x12x1024xf32) <- (-1x144x1024xf32, 4xi64) + reshape_354 = paddle._C_ops.reshape(add_177, full_int_array_43) + del add_177 + + # pd_op.reshape: (-1x1x1x12x12x1024xf32) <- (-1x12x12x1024xf32, 6xi64) + reshape_355 = paddle._C_ops.reshape(reshape_354, full_int_array_45) + del full_int_array_45, reshape_354 + + # pd_op.transpose: (-1x1x12x1x12x1024xf32) <- (-1x1x1x12x12x1024xf32) + transpose_156 = paddle._C_ops.transpose(reshape_355, [0, 1, 3, 2, 4, 5]) + del reshape_355 + + # pd_op.reshape: (-1x12x12x1024xf32) <- (-1x1x12x1x12x1024xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(transpose_156, full_int_array_43) + del full_int_array_43, transpose_156 + + # pd_op.roll: (-1x12x12x1024xf32) <- (-1x12x12x1024xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_356, full_int_array_29, [1, 2]) + del full_int_array_29, reshape_356 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_155 = [slice_160, full_4, full_27] + del full_27, full_4, slice_160 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_152 = paddle._C_ops.stack(combine_155, 0) + del combine_155 + + # pd_op.reshape: (-1x144x1024xf32) <- (-1x12x12x1024xf32, 3xi64) + reshape_357 = paddle._C_ops.reshape(roll_23, stack_152) + del roll_23, stack_152 + + # pd_op.add: (-1x144x1024xf32) <- (-1x144x1024xf32, -1x144x1024xf32) + add_178 = paddle._C_ops.add(add_173, reshape_357) + del add_173, reshape_357 + + # pd_op.layer_norm: (-1x144x1024xf32, -1x144xf32, -1x144xf32) <- (-1x144x1024xf32, 1024xf32, 1024xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_178, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (-1x144x4096xf32) <- (-1x144x1024xf32, 1024x4096xf32) + matmul_145 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del layer_norm_153, parameter_7 + + # pd_op.add: (-1x144x4096xf32) <- (-1x144x4096xf32, 4096xf32) + add_179 = paddle._C_ops.add(matmul_145, parameter_6) + del matmul_145, parameter_6 + + # pd_op.gelu: (-1x144x4096xf32) <- (-1x144x4096xf32) + gelu_23 = paddle._C_ops.gelu(add_179, False) + del add_179 + + # pd_op.matmul: (-1x144x1024xf32) <- (-1x144x4096xf32, 4096x1024xf32) + matmul_146 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del gelu_23, parameter_5 + + # pd_op.add: (-1x144x1024xf32) <- (-1x144x1024xf32, 1024xf32) + add_180 = paddle._C_ops.add(matmul_146, parameter_4) + del matmul_146, parameter_4 + + # pd_op.add: (-1x144x1024xf32) <- (-1x144x1024xf32, -1x144x1024xf32) + add_181 = paddle._C_ops.add(add_178, add_180) + del add_178, add_180 + + # pd_op.layer_norm: (-1x144x1024xf32, -1x144xf32, -1x144xf32) <- (-1x144x1024xf32, 1024xf32, 1024xf32) + layer_norm_156, layer_norm_157, layer_norm_158 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_181, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_181, parameter_2, parameter_3 + + # pd_op.transpose: (-1x1024x144xf32) <- (-1x144x1024xf32) + transpose_157 = paddle._C_ops.transpose(layer_norm_156, [0, 2, 1]) + del layer_norm_156 + + # pd_op.unsqueeze: (-1x1024x1x144xf32) <- (-1x1024x144xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(transpose_157, full_int_array_5) + del transpose_157 + + # pd_op.pool2d: (-1x1024x1x1xf32) <- (-1x1024x1x144xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_72, + full_int_array_14, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_14, unsqueeze_72 + + # pd_op.squeeze: (-1x1024x1xf32) <- (-1x1024x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_5) + del full_int_array_5, pool2d_0 + + # pd_op.flatten: (-1x1024xf32) <- (-1x1024x1xf32) + flatten_1 = paddle._C_ops.flatten(squeeze_0, 1, 2) + del squeeze_0 + + # pd_op.matmul: (-1x102xf32) <- (-1x1024xf32, 1024x102xf32) + matmul_147 = paddle._C_ops.matmul(flatten_1, parameter_1, False, False) + del flatten_1, parameter_1 + + # pd_op.add: (-1x102xf32) <- (-1x102xf32, 102xf32) + add_0 = paddle._C_ops.add(matmul_147, parameter_0) + del matmul_147, parameter_0 + + return ( + add_0, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/weight_meta.py new file mode 100644 index 00000000..69d1deec --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window12_384/subgraph_1/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [1024, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [4096, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1024, 4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [1024, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1024, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [4096, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [1024, 4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1024, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [1024, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [2048, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [1024, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1024, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [256, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [256, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [256, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [1024, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [256, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [256, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [256, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [512, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [512, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [128, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [128, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [128, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [512, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [128, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [128, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [128, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [128, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/graph_net.json b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/input_meta.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/input_meta.py new file mode 100644 index 00000000..f78cee85 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [12, 3, 224, 224] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [169, 4] + dtype = "float32" + low = -8.34933 + high = 5.14802 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [169, 4] + dtype = "float32" + low = -6.26847 + high = 5.23487 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [169, 8] + dtype = "float32" + low = -6.01911 + high = 7.4613 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [169, 8] + dtype = "float32" + low = -7.64864 + high = 5.64592 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [169, 32] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [169, 32] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/model.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/model.py new file mode 100644 index 00000000..97edeac7 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/model.py @@ -0,0 +1,13771 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.shape64: (4xi64) <- (-1x3x224x224xf32) + shape64_0 = paddle._C_ops.shape64(data_0) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [0] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_266 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_259 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_256 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_249 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_233 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_226 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_223 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_216 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_213 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_206 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_203 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_196 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_193 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_186 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_183 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_176 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_173 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_166 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_163 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_156 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_153 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_146 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_143 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_136 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_133 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_126 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_123 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_116 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_113 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_106 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_103 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_96 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_93 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_86 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_83 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_76 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_73 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_66 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_63 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_56 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_40 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_33 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_30 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_23 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_12 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_5 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_4 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_0 = full_int_array_7 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_8 = [1] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_261 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_260 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_251 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_250 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_228 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_227 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_218 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_217 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_208 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_207 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_198 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_197 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_188 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_187 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_178 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_177 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_168 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_167 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_158 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_157 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_148 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_147 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_138 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_137 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_128 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_127 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_118 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_117 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_108 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_107 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_98 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_97 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_88 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_87 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_78 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_77 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_68 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_67 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_58 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_57 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_35 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_34 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_25 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_24 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_7 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_6 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_2 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_1 = full_int_array_8 + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + shape64_0, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_0 + + # pd_op.conv2d: (-1x128x56x56xf32) <- (-1x3x224x224xf32, 128x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, -1, 1, 1] + + # pd_op.reshape: (1x128x1x1xf32) <- (128xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_9) + del full_int_array_9, parameter_303 + + # pd_op.add: (-1x128x56x56xf32) <- (-1x128x56x56xf32, 1x128x1x1xf32) + add_0 = paddle._C_ops.add(conv2d_0, reshape_0) + + # pd_op.shape64: (4xi64) <- (-1x128x56x56xf32) + shape64_1 = paddle._C_ops.shape64(add_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + shape64_1, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_1 + + # pd_op.flatten: (-1x128x3136xf32) <- (-1x128x56x56xf32) + flatten_1 = paddle._C_ops.flatten(add_0, 2, 3) + + # pd_op.transpose: (-1x3136x128xf32) <- (-1x128x3136xf32) + transpose_0 = paddle._C_ops.transpose(flatten_1, [0, 2, 1]) + del flatten_1 + + # pd_op.layer_norm: (-1x3136x128xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302 + + # pd_op.shape64: (3xi64) <- (-1x3136x128xf32) + shape64_2 = paddle._C_ops.shape64(layer_norm_0) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + shape64_2, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_2 + + # pd_op.layer_norm: (-1x3136x128xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x128xf32, 128xf32, 128xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full: (xi64) <- () + full_25 = paddle._C_ops.full( + [], float("56"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_26 = paddle._C_ops.full( + [], float("128"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_0 = [slice_26, full_25, full_25, full_26] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_0 = paddle._C_ops.stack(combine_0, 0) + del combine_0 + + # pd_op.reshape: (-1x56x56x128xf32) <- (-1x3136x128xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, stack_0) + del stack_0 + + # pd_op.shape64: (4xi64) <- (-1x56x56x128xf32) + shape64_3 = paddle._C_ops.shape64(reshape_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + shape64_3, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_3 + + # pd_op.full: (xi64) <- () + full_27 = paddle._C_ops.full( + [], float("8"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_28 = paddle._C_ops.full( + [], float("7"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_1 = [slice_27, full_27, full_28, full_27, full_28, full_26] + del slice_27 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_1 = paddle._C_ops.stack(combine_1, 0) + del combine_1 + + # pd_op.reshape: (-1x8x7x8x7x128xf32) <- (-1x56x56x128xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_1, stack_1) + del stack_1 + + # pd_op.transpose: (-1x8x8x7x7x128xf32) <- (-1x8x7x8x7x128xf32) + transpose_1 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [-1, 7, 7, 128] + + # pd_op.reshape: (-1x7x7x128xf32) <- (-1x8x8x7x7x128xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(transpose_1, full_int_array_10) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_11 = [-1, 49, 128] + + # pd_op.reshape: (-1x49x128xf32) <- (-1x7x7x128xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(reshape_2, full_int_array_11) + + # pd_op.shape64: (3xi64) <- (-1x49x128xf32) + shape64_4 = paddle._C_ops.shape64(reshape_3) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + shape64_4, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_4 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x128xf32, 128x384xf32) + matmul_0 = paddle._C_ops.matmul(reshape_3, parameter_298, False, False) + del parameter_298 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_1 = paddle._C_ops.add(matmul_0, parameter_297) + del parameter_297 + + # pd_op.full: (xi64) <- () + full_29 = paddle._C_ops.full( + [], float("49"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_30 = paddle._C_ops.full( + [], float("3"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_31 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_32 = paddle._C_ops.full( + [], float("32"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_2 = [slice_28, full_29, full_30, full_31, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_2 = paddle._C_ops.stack(combine_2, 0) + del combine_2 + + # pd_op.reshape: (-1x49x3x4x32xf32) <- (-1x49x384xf32, 5xi64) + reshape_212 = paddle._C_ops.reshape(add_1, stack_2) + del stack_2 + + # pd_op.transpose: (3x-1x4x49x32xf32) <- (-1x49x3x4x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_212, [2, 0, 3, 1, 4]) + del reshape_212 + + # pd_op.slice: (-1x4x49x32xf32) <- (3x-1x4x49x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [2] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_271 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_269 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_263 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_262 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_253 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_252 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_230 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_229 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_220 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_219 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_210 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_209 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_200 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_199 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_190 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_189 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_180 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_179 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_170 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_169 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_160 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_159 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_150 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_149 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_140 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_139 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_130 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_129 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_120 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_119 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_110 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_109 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_100 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_99 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_90 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_89 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_80 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_79 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_70 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_69 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_60 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_59 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_37 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_36 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_27 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_26 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_9 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_8 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_3 = full_int_array_0 + + # pd_op.slice: (-1x4x49x32xf32) <- (3x-1x4x49x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [3] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_264 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_254 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_231 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_221 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_211 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_201 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_191 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_181 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_171 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_161 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_151 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_141 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_131 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_121 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_111 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_101 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_91 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_81 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_71 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_61 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_38 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_28 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_10 = full_int_array_1 + + # pd_op.slice: (-1x4x49x32xf32) <- (3x-1x4x49x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xf32) <- (1xf32) + assign_265 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_255 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_232 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_222 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_212 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_202 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_192 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_182 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_172 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_162 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_152 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_142 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_132 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_122 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_112 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_102 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_92 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_82 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_72 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_62 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_39 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_29 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_11 = full_0 + + # pd_op.scale: (-1x4x49x32xf32) <- (-1x4x49x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_29, full_0, float("0"), True) + del slice_29 + + # pd_op.transpose: (-1x4x32x49xf32) <- (-1x4x49x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_30, [0, 1, 3, 2]) + del slice_30 + + # pd_op.matmul: (-1x4x49x49xf32) <- (-1x4x49x32xf32, -1x4x32x49xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_12 = [-1] + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_4 = paddle._C_ops.reshape(data_1, full_int_array_12) + del data_1 + + # pd_op.index_select: (2401x4xf32) <- (169x4xf32, 2401xi64) + index_select_0 = paddle._C_ops.index_select(data_2, reshape_4, 0) + del data_2 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_13 = [49, 49, -1] + + # pd_op.reshape: (49x49x4xf32) <- (2401x4xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(index_select_0, full_int_array_13) + + # pd_op.transpose: (4x49x49xf32) <- (49x49x4xf32) + transpose_4 = paddle._C_ops.transpose(reshape_213, [2, 0, 1]) + del reshape_213 + + # pd_op.unsqueeze: (1x4x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_7) + + # pd_op.add: (-1x4x49x49xf32) <- (-1x4x49x49xf32, 1x4x49x49xf32) + add_170 = paddle._C_ops.add(matmul_1, unsqueeze_0) + + # pd_op.softmax: (-1x4x49x49xf32) <- (-1x4x49x49xf32) + softmax_0 = paddle._C_ops.softmax(add_170, -1) + del add_170 + + # pd_op.matmul: (-1x4x49x32xf32) <- (-1x4x49x49xf32, -1x4x49x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_0, slice_0, False, False) + + # pd_op.transpose: (-1x49x4x32xf32) <- (-1x4x49x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_3 = [slice_28, full_29, full_26] + del slice_28 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_3 = paddle._C_ops.stack(combine_3, 0) + del combine_3 + + # pd_op.reshape: (-1x49x128xf32) <- (-1x49x4x32xf32, 3xi64) + reshape_5 = paddle._C_ops.reshape(transpose_5, stack_3) + del stack_3 + + # pd_op.matmul: (-1x49x128xf32) <- (-1x49x128xf32, 128x128xf32) + matmul_2 = paddle._C_ops.matmul(reshape_5, parameter_296, False, False) + del parameter_296 + + # pd_op.add: (-1x49x128xf32) <- (-1x49x128xf32, 128xf32) + add_2 = paddle._C_ops.add(matmul_2, parameter_295) + del parameter_295 + + # pd_op.reshape: (-1x7x7x128xf32) <- (-1x49x128xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_2, full_int_array_10) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_14 = [-1, 8, 8, 7, 7, 128] + + # pd_op.reshape: (-1x8x8x7x7x128xf32) <- (-1x7x7x128xf32, 6xi64) + reshape_214 = paddle._C_ops.reshape(reshape_6, full_int_array_14) + + # pd_op.transpose: (-1x8x7x8x7x128xf32) <- (-1x8x8x7x7x128xf32) + transpose_6 = paddle._C_ops.transpose(reshape_214, [0, 1, 3, 2, 4, 5]) + del reshape_214 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_15 = [-1, 56, 56, 128] + + # pd_op.reshape: (-1x56x56x128xf32) <- (-1x8x7x8x7x128xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(transpose_6, full_int_array_15) + + # pd_op.full: (xi64) <- () + full_33 = paddle._C_ops.full( + [], float("3136"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_4 = [slice_26, full_33, full_26] + del slice_26 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_4 = paddle._C_ops.stack(combine_4, 0) + del combine_4 + + # pd_op.reshape: (-1x3136x128xf32) <- (-1x56x56x128xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(reshape_7, stack_4) + del stack_4 + + # pd_op.add: (-1x3136x128xf32) <- (-1x3136x128xf32, -1x3136x128xf32) + add_3 = paddle._C_ops.add(layer_norm_0, reshape_8) + + # pd_op.layer_norm: (-1x3136x128xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x128xf32, 128xf32, 128xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_3, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (-1x3136x512xf32) <- (-1x3136x128xf32, 128x512xf32) + matmul_3 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del parameter_292 + + # pd_op.add: (-1x3136x512xf32) <- (-1x3136x512xf32, 512xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_291) + del parameter_291 + + # pd_op.gelu: (-1x3136x512xf32) <- (-1x3136x512xf32) + gelu_0 = paddle._C_ops.gelu(add_4, False) + + # pd_op.matmul: (-1x3136x128xf32) <- (-1x3136x512xf32, 512x128xf32) + matmul_4 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del parameter_290 + + # pd_op.add: (-1x3136x128xf32) <- (-1x3136x128xf32, 128xf32) + add_5 = paddle._C_ops.add(matmul_4, parameter_289) + del parameter_289 + + # pd_op.add: (-1x3136x128xf32) <- (-1x3136x128xf32, -1x3136x128xf32) + add_6 = paddle._C_ops.add(add_3, add_5) + + # pd_op.shape64: (3xi64) <- (-1x3136x128xf32) + shape64_5 = paddle._C_ops.shape64(add_6) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + shape64_5, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_5 + + # pd_op.layer_norm: (-1x3136x128xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x128xf32, 128xf32, 128xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_5 = [slice_31, full_25, full_25, full_26] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_5 = paddle._C_ops.stack(combine_5, 0) + del combine_5 + + # pd_op.reshape: (-1x56x56x128xf32) <- (-1x3136x128xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(layer_norm_9, stack_5) + del stack_5 + + # pd_op.shape64: (4xi64) <- (-1x56x56x128xf32) + shape64_6 = paddle._C_ops.shape64(reshape_9) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + shape64_6, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_2 = [-3, -3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_258 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_225 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_205 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_185 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_165 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_145 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_125 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_105 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_85 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_65 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_32 = full_int_array_2 + + # pd_op.roll: (-1x56x56x128xf32) <- (-1x56x56x128xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_9, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x56x56x128xf32) + shape64_7 = paddle._C_ops.shape64(roll_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + shape64_7, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_7 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_6 = [slice_33, full_27, full_28, full_27, full_28, full_26] + del slice_33 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_6 = paddle._C_ops.stack(combine_6, 0) + del combine_6 + + # pd_op.reshape: (-1x8x7x8x7x128xf32) <- (-1x56x56x128xf32, 6xi64) + reshape_215 = paddle._C_ops.reshape(roll_0, stack_6) + del stack_6 + + # pd_op.transpose: (-1x8x8x7x7x128xf32) <- (-1x8x7x8x7x128xf32) + transpose_7 = paddle._C_ops.transpose(reshape_215, [0, 1, 3, 2, 4, 5]) + del reshape_215 + + # pd_op.reshape: (-1x7x7x128xf32) <- (-1x8x8x7x7x128xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(transpose_7, full_int_array_10) + + # pd_op.reshape: (-1x49x128xf32) <- (-1x7x7x128xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(reshape_10, full_int_array_11) + del full_int_array_11 + + # pd_op.full: (1x56x56x1xf32) <- () + full_34 = paddle._C_ops.full( + [1, 56, 56, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_16 = [0, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_236 = full_int_array_16 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_43 = full_int_array_16 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_14 = full_int_array_16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_17 = [-7, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_18 = [1, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_270 = full_int_array_18 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_245 = full_int_array_18 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_52 = full_int_array_18 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_20 = full_int_array_18 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_34, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_34 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_19 = [0, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_20 = [-7, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [0, -3] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-7, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [-7, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [-3, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [-3, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_26 = [-3, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_27 = [2147483647, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [2147483647, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [2147483647, 2147483647] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_246 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_243 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_240 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_237 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_53 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_50 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_47 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_44 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_21 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_18 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_16 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_15 = full_int_array_29 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_30 = [1, 8, 7, 8, 7, 1] + + # pd_op.reshape: (1x8x7x8x7x1xf32) <- (1x56x56x1xf32, 6xi64) + reshape_216 = paddle._C_ops.reshape(set_value__0, full_int_array_30) + del full_int_array_30 + + # pd_op.transpose: (1x8x8x7x7x1xf32) <- (1x8x7x8x7x1xf32) + transpose_146 = paddle._C_ops.transpose(reshape_216, [0, 1, 3, 2, 4, 5]) + del reshape_216 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_31 = [-1, 7, 7, 1] + + # pd_op.reshape: (64x7x7x1xf32) <- (1x8x8x7x7x1xf32, 4xi64) + reshape_217 = paddle._C_ops.reshape(transpose_146, full_int_array_31) + del transpose_146 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [-1, 49] + + # pd_op.reshape: (64x49xf32) <- (64x7x7x1xf32, 2xi64) + reshape_218 = paddle._C_ops.reshape(reshape_217, full_int_array_32) + del reshape_217 + + # pd_op.unsqueeze: (64x1x49xf32) <- (64x49xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_8) + + # pd_op.unsqueeze: (64x49x1xf32) <- (64x49xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_0) + del reshape_218 + + # pd_op.subtract: (64x49x49xf32) <- (64x1x49xf32, 64x49x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.full: (xf32) <- () + full_35 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x49x49xb) <- (64x49x49xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_35) + + # pd_op.full: (64x49x49xf32) <- () + full_36 = paddle._C_ops.full( + [64, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_36, subtract_0) + del full_36, not_equal_0, subtract_0 + + # pd_op.equal: (64x49x49xb) <- (64x49x49xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_35) + + # pd_op.full: (64x49x49xf32) <- () + full_37 = paddle._C_ops.full( + [64, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_1 = paddle._C_ops.where(equal_0, full_37, where_0) + del equal_0, full_37, where_0 + + # pd_op.shape64: (3xi64) <- (-1x49x128xf32) + shape64_8 = paddle._C_ops.shape64(reshape_11) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + shape64_8, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_8 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x128xf32, 128x384xf32) + matmul_5 = paddle._C_ops.matmul(reshape_11, parameter_286, False, False) + del parameter_286 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_285) + del parameter_285 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_7 = [slice_34, full_29, full_30, full_31, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_7 = paddle._C_ops.stack(combine_7, 0) + del combine_7 + + # pd_op.reshape: (-1x49x3x4x32xf32) <- (-1x49x384xf32, 5xi64) + reshape_219 = paddle._C_ops.reshape(add_7, stack_7) + del stack_7 + + # pd_op.transpose: (3x-1x4x49x32xf32) <- (-1x49x3x4x32xf32) + transpose_8 = paddle._C_ops.transpose(reshape_219, [2, 0, 3, 1, 4]) + del reshape_219 + + # pd_op.slice: (-1x4x49x32xf32) <- (3x-1x4x49x32xf32, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x4x49x32xf32) <- (3x-1x4x49x32xf32, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x4x49x32xf32) <- (3x-1x4x49x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x4x49x32xf32) <- (-1x4x49x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_35, full_0, float("0"), True) + del slice_35 + + # pd_op.transpose: (-1x4x32x49xf32) <- (-1x4x49x32xf32) + transpose_9 = paddle._C_ops.transpose(slice_36, [0, 1, 3, 2]) + del slice_36 + + # pd_op.matmul: (-1x4x49x49xf32) <- (-1x4x49x32xf32, -1x4x32x49xf32) + matmul_6 = paddle._C_ops.matmul(scale_1, transpose_9, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_12 = paddle._C_ops.reshape(data_3, full_int_array_12) + del data_3 + + # pd_op.index_select: (2401x4xf32) <- (169x4xf32, 2401xi64) + index_select_1 = paddle._C_ops.index_select(data_4, reshape_12, 0) + del data_4 + + # pd_op.reshape: (49x49x4xf32) <- (2401x4xf32, 3xi64) + reshape_220 = paddle._C_ops.reshape(index_select_1, full_int_array_13) + + # pd_op.transpose: (4x49x49xf32) <- (49x49x4xf32) + transpose_10 = paddle._C_ops.transpose(reshape_220, [2, 0, 1]) + del reshape_220 + + # pd_op.unsqueeze: (1x4x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(transpose_10, full_int_array_7) + + # pd_op.add: (-1x4x49x49xf32) <- (-1x4x49x49xf32, 1x4x49x49xf32) + add_8 = paddle._C_ops.add(matmul_6, unsqueeze_1) + + # pd_op.full: (xi64) <- () + full_38 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_0 = paddle._C_ops.floor_divide(slice_34, full_38) + del full_38 + + # pd_op.full: (xi64) <- () + full_39 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_8 = [floor_divide_0, full_39, full_31, full_29, full_29] + del floor_divide_0, full_39 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_8 = paddle._C_ops.stack(combine_8, 0) + del combine_8 + + # pd_op.reshape: (-1x64x4x49x49xf32) <- (-1x4x49x49xf32, 5xi64) + reshape_13 = paddle._C_ops.reshape(add_8, stack_8) + del stack_8 + + # pd_op.unsqueeze: (64x1x49x49xf32) <- (64x49x49xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(where_1, full_int_array_8) + del where_1 + + # pd_op.unsqueeze: (1x64x1x49x49xf32) <- (64x1x49x49xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(unsqueeze_39, full_int_array_7) + del unsqueeze_39 + + # pd_op.add: (-1x64x4x49x49xf32) <- (-1x64x4x49x49xf32, 1x64x1x49x49xf32) + add_9 = paddle._C_ops.add(reshape_13, unsqueeze_2) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_9 = [slice_34, full_31, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_9 = paddle._C_ops.stack(combine_9, 0) + del combine_9 + + # pd_op.reshape: (-1x4x49x49xf32) <- (-1x64x4x49x49xf32, 4xi64) + reshape_221 = paddle._C_ops.reshape(add_9, stack_9) + del stack_9 + + # pd_op.softmax: (-1x4x49x49xf32) <- (-1x4x49x49xf32) + softmax_1 = paddle._C_ops.softmax(reshape_221, -1) + del reshape_221 + + # pd_op.matmul: (-1x4x49x32xf32) <- (-1x4x49x49xf32, -1x4x49x32xf32) + matmul_125 = paddle._C_ops.matmul(softmax_1, slice_1, False, False) + + # pd_op.transpose: (-1x49x4x32xf32) <- (-1x4x49x32xf32) + transpose_11 = paddle._C_ops.transpose(matmul_125, [0, 2, 1, 3]) + del matmul_125 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_10 = [slice_34, full_29, full_26] + del slice_34 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_10 = paddle._C_ops.stack(combine_10, 0) + del combine_10 + + # pd_op.reshape: (-1x49x128xf32) <- (-1x49x4x32xf32, 3xi64) + reshape_14 = paddle._C_ops.reshape(transpose_11, stack_10) + del stack_10 + + # pd_op.matmul: (-1x49x128xf32) <- (-1x49x128xf32, 128x128xf32) + matmul_7 = paddle._C_ops.matmul(reshape_14, parameter_284, False, False) + del parameter_284 + + # pd_op.add: (-1x49x128xf32) <- (-1x49x128xf32, 128xf32) + add_10 = paddle._C_ops.add(matmul_7, parameter_283) + del parameter_283 + + # pd_op.reshape: (-1x7x7x128xf32) <- (-1x49x128xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(add_10, full_int_array_10) + del full_int_array_10 + + # pd_op.reshape: (-1x8x8x7x7x128xf32) <- (-1x7x7x128xf32, 6xi64) + reshape_222 = paddle._C_ops.reshape(reshape_15, full_int_array_14) + del full_int_array_14 + + # pd_op.transpose: (-1x8x7x8x7x128xf32) <- (-1x8x8x7x7x128xf32) + transpose_12 = paddle._C_ops.transpose(reshape_222, [0, 1, 3, 2, 4, 5]) + del reshape_222 + + # pd_op.reshape: (-1x56x56x128xf32) <- (-1x8x7x8x7x128xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(transpose_12, full_int_array_15) + del full_int_array_15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_3 = [3, 3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_267 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_234 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_214 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_194 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_174 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_154 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_134 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_114 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_94 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_74 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_41 = full_int_array_3 + + # pd_op.roll: (-1x56x56x128xf32) <- (-1x56x56x128xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_16, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_11 = [slice_31, full_33, full_26] + del full_33, slice_31 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_11 = paddle._C_ops.stack(combine_11, 0) + del combine_11 + + # pd_op.reshape: (-1x3136x128xf32) <- (-1x56x56x128xf32, 3xi64) + reshape_17 = paddle._C_ops.reshape(roll_1, stack_11) + del stack_11 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], + float("0.978261"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_13 = full_1 + + # pd_op.shape64: (3xi64) <- (-1x3136x128xf32) + shape64_9 = paddle._C_ops.shape64(reshape_17) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + shape64_9, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_9 + + # pd_op.full: (xi64) <- () + full_40 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_12 = [slice_37, full_40, full_40] + del slice_37 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_12 = paddle._C_ops.stack(combine_12, 0) + del combine_12 + + # pd_op.full: (1xf32) <- () + full_41 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf32) <- () + full_42 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_0 = paddle._C_ops.uniform( + stack_12, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_12 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_171 = paddle._C_ops.add(full_1, uniform_0) + del uniform_0 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_0 = paddle._C_ops.floor(add_171) + del add_171 + + # pd_op.divide: (-1x3136x128xf32) <- (-1x3136x128xf32, xf32) + divide_0 = paddle._C_ops.divide(reshape_17, full_1) + + # pd_op.multiply: (-1x3136x128xf32) <- (-1x3136x128xf32, -1x1x1xf32) + multiply_0 = paddle._C_ops.multiply(divide_0, floor_0) + + # pd_op.add: (-1x3136x128xf32) <- (-1x3136x128xf32, -1x3136x128xf32) + add_11 = paddle._C_ops.add(add_6, multiply_0) + + # pd_op.layer_norm: (-1x3136x128xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x128xf32, 128xf32, 128xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (-1x3136x512xf32) <- (-1x3136x128xf32, 128x512xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del parameter_280 + + # pd_op.add: (-1x3136x512xf32) <- (-1x3136x512xf32, 512xf32) + add_12 = paddle._C_ops.add(matmul_8, parameter_279) + del parameter_279 + + # pd_op.gelu: (-1x3136x512xf32) <- (-1x3136x512xf32) + gelu_1 = paddle._C_ops.gelu(add_12, False) + + # pd_op.matmul: (-1x3136x128xf32) <- (-1x3136x512xf32, 512x128xf32) + matmul_9 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del parameter_278 + + # pd_op.add: (-1x3136x128xf32) <- (-1x3136x128xf32, 128xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_277) + del parameter_277 + + # pd_op.shape64: (3xi64) <- (-1x3136x128xf32) + shape64_10 = paddle._C_ops.shape64(add_13) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + shape64_10, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_10 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_13 = [slice_38, full_40, full_40] + del slice_38 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_13 = paddle._C_ops.stack(combine_13, 0) + del combine_13 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_1 = paddle._C_ops.uniform( + stack_13, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_13 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_172 = paddle._C_ops.add(full_1, uniform_1) + del uniform_1 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_1 = paddle._C_ops.floor(add_172) + del add_172 + + # pd_op.divide: (-1x3136x128xf32) <- (-1x3136x128xf32, xf32) + divide_1 = paddle._C_ops.divide(add_13, full_1) + + # pd_op.multiply: (-1x3136x128xf32) <- (-1x3136x128xf32, -1x1x1xf32) + multiply_1 = paddle._C_ops.multiply(divide_1, floor_1) + + # pd_op.add: (-1x3136x128xf32) <- (-1x3136x128xf32, -1x3136x128xf32) + add_14 = paddle._C_ops.add(add_11, multiply_1) + + # pd_op.shape64: (3xi64) <- (-1x3136x128xf32) + shape64_11 = paddle._C_ops.shape64(add_14) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + shape64_11, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_11 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_14 = [slice_39, full_25, full_25, full_26] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_14 = paddle._C_ops.stack(combine_14, 0) + del combine_14 + + # pd_op.reshape: (-1x56x56x128xf32) <- (-1x3136x128xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_14, stack_14) + del stack_14 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_4 = [2, 2] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_247 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_244 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_241 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_238 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_54 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_51 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_48 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_45 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_22 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_19 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_17 = full_int_array_4 + + # pd_op.strided_slice: (-1x28x28x128xf32) <- (-1x56x56x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_16, full_int_array_29, full_int_array_4 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [1, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_239 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_46 = full_int_array_5 + + # pd_op.strided_slice: (-1x28x28x128xf32) <- (-1x56x56x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_5, full_int_array_29, full_int_array_4 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_6 = [0, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_242 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_49 = full_int_array_6 + + # pd_op.strided_slice: (-1x28x28x128xf32) <- (-1x56x56x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_6, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x28x28x128xf32) <- (-1x56x56x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_18, full_int_array_29, full_int_array_4 + ) + + # pd_op.shape64: (4xi64) <- (-1x56x56x128xf32) + shape64_12 = paddle._C_ops.shape64(reshape_18) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + shape64_12, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_15 = [slice_40, full_25, full_25, full_26] + del full_25, full_26, slice_40 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_15 = paddle._C_ops.stack(combine_15, 0) + del combine_15 + + # pd_op.reshape: (-1x56x56x128xf32) <- (-1x56x56x128xf32, 4xi64) + reshape_223 = paddle._C_ops.reshape(reshape_18, stack_15) + del stack_15 + + # pd_op.full: (1xi32) <- () + full_2 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xi32) <- (1xi32) + assign_248 = full_2 + + # pd_op.assign: (1xi32) <- (1xi32) + assign_55 = full_2 + + # builtin.combine: ([-1x28x28x128xf32, -1x28x28x128xf32, -1x28x28x128xf32, -1x28x28x128xf32]) <- (-1x28x28x128xf32, -1x28x28x128xf32, -1x28x28x128xf32, -1x28x28x128xf32) + combine_16 = [ + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + ] + + # pd_op.concat: (-1x28x28x512xf32) <- ([-1x28x28x128xf32, -1x28x28x128xf32, -1x28x28x128xf32, -1x28x28x128xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_16, full_2) + del combine_16 + + # pd_op.full: (xi64) <- () + full_43 = paddle._C_ops.full( + [], float("-1"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_44 = paddle._C_ops.full( + [], float("512"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_17 = [slice_39, full_43, full_44] + del slice_39 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_16 = paddle._C_ops.stack(combine_17, 0) + del combine_17 + + # pd_op.reshape: (-1x-1x512xf32) <- (-1x28x28x512xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(concat_0, stack_16) + del stack_16 + + # pd_op.layer_norm: (-1x-1x512xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x512xf32, 512xf32, 512xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_19, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276 + + # pd_op.matmul: (-1x-1x256xf32) <- (-1x-1x512xf32, 512x256xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del parameter_274 + + # pd_op.shape64: (3xi64) <- (-1x-1x256xf32) + shape64_13 = paddle._C_ops.shape64(matmul_10) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + shape64_13, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_13 + + # pd_op.shape64: (3xi64) <- (-1x-1x256xf32) + shape64_14 = paddle._C_ops.shape64(matmul_10) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + shape64_14, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + del shape64_14 + + # pd_op.layer_norm: (-1x-1x256xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x256xf32, 256xf32, 256xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_10, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full: (xi64) <- () + full_45 = paddle._C_ops.full( + [], float("28"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_46 = paddle._C_ops.full( + [], float("256"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_18 = [slice_41, full_45, full_45, full_46] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_17 = paddle._C_ops.stack(combine_18, 0) + del combine_18 + + # pd_op.reshape: (-1x28x28x256xf32) <- (-1x-1x256xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(layer_norm_18, stack_17) + del stack_17 + + # pd_op.shape64: (4xi64) <- (-1x28x28x256xf32) + shape64_15 = paddle._C_ops.shape64(reshape_20) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + shape64_15, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_15 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_19 = [slice_43, full_31, full_28, full_31, full_28, full_46] + del slice_43 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_18 = paddle._C_ops.stack(combine_19, 0) + del combine_19 + + # pd_op.reshape: (-1x4x7x4x7x256xf32) <- (-1x28x28x256xf32, 6xi64) + reshape_224 = paddle._C_ops.reshape(reshape_20, stack_18) + del stack_18 + + # pd_op.transpose: (-1x4x4x7x7x256xf32) <- (-1x4x7x4x7x256xf32) + transpose_13 = paddle._C_ops.transpose(reshape_224, [0, 1, 3, 2, 4, 5]) + del reshape_224 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_33 = [-1, 7, 7, 256] + + # pd_op.reshape: (-1x7x7x256xf32) <- (-1x4x4x7x7x256xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(transpose_13, full_int_array_33) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_34 = [-1, 49, 256] + + # pd_op.reshape: (-1x49x256xf32) <- (-1x7x7x256xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(reshape_21, full_int_array_34) + + # pd_op.shape64: (3xi64) <- (-1x49x256xf32) + shape64_16 = paddle._C_ops.shape64(reshape_22) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + shape64_16, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_16 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x256xf32, 256x768xf32) + matmul_11 = paddle._C_ops.matmul(reshape_22, parameter_271, False, False) + del parameter_271 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_270) + del parameter_270 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_20 = [slice_44, full_29, full_30, full_27, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_19 = paddle._C_ops.stack(combine_20, 0) + del combine_20 + + # pd_op.reshape: (-1x49x3x8x32xf32) <- (-1x49x768xf32, 5xi64) + reshape_225 = paddle._C_ops.reshape(add_15, stack_19) + del stack_19 + + # pd_op.transpose: (3x-1x8x49x32xf32) <- (-1x49x3x8x32xf32) + transpose_14 = paddle._C_ops.transpose(reshape_225, [2, 0, 3, 1, 4]) + del reshape_225 + + # pd_op.slice: (-1x8x49x32xf32) <- (3x-1x8x49x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x8x49x32xf32) <- (3x-1x8x49x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x8x49x32xf32) <- (3x-1x8x49x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x8x49x32xf32) <- (-1x8x49x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_45, full_0, float("0"), True) + del slice_45 + + # pd_op.transpose: (-1x8x32x49xf32) <- (-1x8x49x32xf32) + transpose_15 = paddle._C_ops.transpose(slice_46, [0, 1, 3, 2]) + del slice_46 + + # pd_op.matmul: (-1x8x49x49xf32) <- (-1x8x49x32xf32, -1x8x32x49xf32) + matmul_12 = paddle._C_ops.matmul(scale_2, transpose_15, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_23 = paddle._C_ops.reshape(data_5, full_int_array_12) + del data_5 + + # pd_op.index_select: (2401x8xf32) <- (169x8xf32, 2401xi64) + index_select_2 = paddle._C_ops.index_select(data_6, reshape_23, 0) + del data_6 + + # pd_op.reshape: (49x49x8xf32) <- (2401x8xf32, 3xi64) + reshape_226 = paddle._C_ops.reshape(index_select_2, full_int_array_13) + + # pd_op.transpose: (8x49x49xf32) <- (49x49x8xf32) + transpose_16 = paddle._C_ops.transpose(reshape_226, [2, 0, 1]) + del reshape_226 + + # pd_op.unsqueeze: (1x8x49x49xf32) <- (8x49x49xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_16, full_int_array_7) + + # pd_op.add: (-1x8x49x49xf32) <- (-1x8x49x49xf32, 1x8x49x49xf32) + add_173 = paddle._C_ops.add(matmul_12, unsqueeze_3) + + # pd_op.softmax: (-1x8x49x49xf32) <- (-1x8x49x49xf32) + softmax_2 = paddle._C_ops.softmax(add_173, -1) + del add_173 + + # pd_op.matmul: (-1x8x49x32xf32) <- (-1x8x49x49xf32, -1x8x49x32xf32) + matmul_126 = paddle._C_ops.matmul(softmax_2, slice_2, False, False) + + # pd_op.transpose: (-1x49x8x32xf32) <- (-1x8x49x32xf32) + transpose_17 = paddle._C_ops.transpose(matmul_126, [0, 2, 1, 3]) + del matmul_126 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_21 = [slice_44, full_29, full_46] + del slice_44 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_20 = paddle._C_ops.stack(combine_21, 0) + del combine_21 + + # pd_op.reshape: (-1x49x256xf32) <- (-1x49x8x32xf32, 3xi64) + reshape_24 = paddle._C_ops.reshape(transpose_17, stack_20) + del stack_20 + + # pd_op.matmul: (-1x49x256xf32) <- (-1x49x256xf32, 256x256xf32) + matmul_13 = paddle._C_ops.matmul(reshape_24, parameter_269, False, False) + del parameter_269 + + # pd_op.add: (-1x49x256xf32) <- (-1x49x256xf32, 256xf32) + add_16 = paddle._C_ops.add(matmul_13, parameter_268) + del parameter_268 + + # pd_op.reshape: (-1x7x7x256xf32) <- (-1x49x256xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_16, full_int_array_33) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [-1, 4, 4, 7, 7, 256] + + # pd_op.reshape: (-1x4x4x7x7x256xf32) <- (-1x7x7x256xf32, 6xi64) + reshape_227 = paddle._C_ops.reshape(reshape_25, full_int_array_35) + + # pd_op.transpose: (-1x4x7x4x7x256xf32) <- (-1x4x4x7x7x256xf32) + transpose_18 = paddle._C_ops.transpose(reshape_227, [0, 1, 3, 2, 4, 5]) + del reshape_227 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 28, 28, 256] + + # pd_op.reshape: (-1x28x28x256xf32) <- (-1x4x7x4x7x256xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(transpose_18, full_int_array_36) + + # pd_op.full: (xi64) <- () + full_47 = paddle._C_ops.full( + [], float("784"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_22 = [slice_41, full_47, full_46] + del slice_41 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_21 = paddle._C_ops.stack(combine_22, 0) + del combine_22 + + # pd_op.reshape: (-1x784x256xf32) <- (-1x28x28x256xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, stack_21) + del stack_21 + + # pd_op.full: (xf32) <- () + full_3 = paddle._C_ops.full( + [], + float("0.956522"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_31 = full_3 + + # pd_op.shape64: (3xi64) <- (-1x784x256xf32) + shape64_17 = paddle._C_ops.shape64(reshape_27) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + shape64_17, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_17 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_23 = [slice_47, full_40, full_40] + del slice_47 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_22 = paddle._C_ops.stack(combine_23, 0) + del combine_23 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_2 = paddle._C_ops.uniform( + stack_22, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_22 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_174 = paddle._C_ops.add(full_3, uniform_2) + del uniform_2 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_2 = paddle._C_ops.floor(add_174) + del add_174 + + # pd_op.divide: (-1x784x256xf32) <- (-1x784x256xf32, xf32) + divide_2 = paddle._C_ops.divide(reshape_27, full_3) + + # pd_op.multiply: (-1x784x256xf32) <- (-1x784x256xf32, -1x1x1xf32) + multiply_2 = paddle._C_ops.multiply(divide_2, floor_2) + + # pd_op.add: (-1x784x256xf32) <- (-1x-1x256xf32, -1x784x256xf32) + add_17 = paddle._C_ops.add(matmul_10, multiply_2) + + # pd_op.layer_norm: (-1x784x256xf32, -1x784xf32, -1x784xf32) <- (-1x784x256xf32, 256xf32, 256xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (-1x784x1024xf32) <- (-1x784x256xf32, 256x1024xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del parameter_265 + + # pd_op.add: (-1x784x1024xf32) <- (-1x784x1024xf32, 1024xf32) + add_18 = paddle._C_ops.add(matmul_14, parameter_264) + del parameter_264 + + # pd_op.gelu: (-1x784x1024xf32) <- (-1x784x1024xf32) + gelu_2 = paddle._C_ops.gelu(add_18, False) + + # pd_op.matmul: (-1x784x256xf32) <- (-1x784x1024xf32, 1024x256xf32) + matmul_15 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del parameter_263 + + # pd_op.add: (-1x784x256xf32) <- (-1x784x256xf32, 256xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_262) + del parameter_262 + + # pd_op.shape64: (3xi64) <- (-1x784x256xf32) + shape64_18 = paddle._C_ops.shape64(add_19) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + shape64_18, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_18 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_24 = [slice_48, full_40, full_40] + del slice_48 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_23 = paddle._C_ops.stack(combine_24, 0) + del combine_24 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_3 = paddle._C_ops.uniform( + stack_23, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_23 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_175 = paddle._C_ops.add(full_3, uniform_3) + del uniform_3 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_3 = paddle._C_ops.floor(add_175) + del add_175 + + # pd_op.divide: (-1x784x256xf32) <- (-1x784x256xf32, xf32) + divide_3 = paddle._C_ops.divide(add_19, full_3) + + # pd_op.multiply: (-1x784x256xf32) <- (-1x784x256xf32, -1x1x1xf32) + multiply_3 = paddle._C_ops.multiply(divide_3, floor_3) + + # pd_op.add: (-1x784x256xf32) <- (-1x784x256xf32, -1x784x256xf32) + add_20 = paddle._C_ops.add(add_17, multiply_3) + + # pd_op.shape64: (3xi64) <- (-1x784x256xf32) + shape64_19 = paddle._C_ops.shape64(add_20) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + shape64_19, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_19 + + # pd_op.layer_norm: (-1x784x256xf32, -1x784xf32, -1x784xf32) <- (-1x784x256xf32, 256xf32, 256xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_25 = [slice_49, full_45, full_45, full_46] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_24 = paddle._C_ops.stack(combine_25, 0) + del combine_25 + + # pd_op.reshape: (-1x28x28x256xf32) <- (-1x784x256xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(layer_norm_24, stack_24) + del stack_24 + + # pd_op.shape64: (4xi64) <- (-1x28x28x256xf32) + shape64_20 = paddle._C_ops.shape64(reshape_28) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + shape64_20, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_20 + + # pd_op.roll: (-1x28x28x256xf32) <- (-1x28x28x256xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_28, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x28x28x256xf32) + shape64_21 = paddle._C_ops.shape64(roll_2) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + shape64_21, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_21 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_26 = [slice_51, full_31, full_28, full_31, full_28, full_46] + del slice_51 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_25 = paddle._C_ops.stack(combine_26, 0) + del combine_26 + + # pd_op.reshape: (-1x4x7x4x7x256xf32) <- (-1x28x28x256xf32, 6xi64) + reshape_228 = paddle._C_ops.reshape(roll_2, stack_25) + del stack_25 + + # pd_op.transpose: (-1x4x4x7x7x256xf32) <- (-1x4x7x4x7x256xf32) + transpose_19 = paddle._C_ops.transpose(reshape_228, [0, 1, 3, 2, 4, 5]) + del reshape_228 + + # pd_op.reshape: (-1x7x7x256xf32) <- (-1x4x4x7x7x256xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(transpose_19, full_int_array_33) + + # pd_op.reshape: (-1x49x256xf32) <- (-1x7x7x256xf32, 3xi64) + reshape_30 = paddle._C_ops.reshape(reshape_29, full_int_array_34) + del full_int_array_34 + + # pd_op.full: (1x28x28x1xf32) <- () + full_48 = paddle._C_ops.full( + [1, 28, 28, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_48, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_48 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_37 = [1, 4, 7, 4, 7, 1] + + # pd_op.reshape: (1x4x7x4x7x1xf32) <- (1x28x28x1xf32, 6xi64) + reshape_229 = paddle._C_ops.reshape(set_value__1, full_int_array_37) + del full_int_array_37 + + # pd_op.transpose: (1x4x4x7x7x1xf32) <- (1x4x7x4x7x1xf32) + transpose_147 = paddle._C_ops.transpose(reshape_229, [0, 1, 3, 2, 4, 5]) + del reshape_229 + + # pd_op.reshape: (16x7x7x1xf32) <- (1x4x4x7x7x1xf32, 4xi64) + reshape_230 = paddle._C_ops.reshape(transpose_147, full_int_array_31) + del transpose_147 + + # pd_op.reshape: (16x49xf32) <- (16x7x7x1xf32, 2xi64) + reshape_231 = paddle._C_ops.reshape(reshape_230, full_int_array_32) + del reshape_230 + + # pd_op.unsqueeze: (16x1x49xf32) <- (16x49xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_8) + + # pd_op.unsqueeze: (16x49x1xf32) <- (16x49xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_0) + del reshape_231 + + # pd_op.subtract: (16x49x49xf32) <- (16x1x49xf32, 16x49x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_40, unsqueeze_41) + del unsqueeze_40, unsqueeze_41 + + # pd_op.not_equal: (16x49x49xb) <- (16x49x49xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_35) + + # pd_op.full: (16x49x49xf32) <- () + full_49 = paddle._C_ops.full( + [16, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_49, subtract_1) + del full_49, not_equal_1, subtract_1 + + # pd_op.equal: (16x49x49xb) <- (16x49x49xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_35) + + # pd_op.full: (16x49x49xf32) <- () + full_50 = paddle._C_ops.full( + [16, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_3 = paddle._C_ops.where(equal_1, full_50, where_2) + del equal_1, full_50, where_2 + + # pd_op.shape64: (3xi64) <- (-1x49x256xf32) + shape64_22 = paddle._C_ops.shape64(reshape_30) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + shape64_22, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_22 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x256xf32, 256x768xf32) + matmul_16 = paddle._C_ops.matmul(reshape_30, parameter_259, False, False) + del parameter_259 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_16, parameter_258) + del parameter_258 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_27 = [slice_52, full_29, full_30, full_27, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_26 = paddle._C_ops.stack(combine_27, 0) + del combine_27 + + # pd_op.reshape: (-1x49x3x8x32xf32) <- (-1x49x768xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_21, stack_26) + del stack_26 + + # pd_op.transpose: (3x-1x8x49x32xf32) <- (-1x49x3x8x32xf32) + transpose_20 = paddle._C_ops.transpose(reshape_232, [2, 0, 3, 1, 4]) + del reshape_232 + + # pd_op.slice: (-1x8x49x32xf32) <- (3x-1x8x49x32xf32, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x8x49x32xf32) <- (3x-1x8x49x32xf32, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x8x49x32xf32) <- (3x-1x8x49x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x8x49x32xf32) <- (-1x8x49x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_53, full_0, float("0"), True) + del slice_53 + + # pd_op.transpose: (-1x8x32x49xf32) <- (-1x8x49x32xf32) + transpose_21 = paddle._C_ops.transpose(slice_54, [0, 1, 3, 2]) + del slice_54 + + # pd_op.matmul: (-1x8x49x49xf32) <- (-1x8x49x32xf32, -1x8x32x49xf32) + matmul_17 = paddle._C_ops.matmul(scale_3, transpose_21, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_31 = paddle._C_ops.reshape(data_7, full_int_array_12) + del data_7 + + # pd_op.index_select: (2401x8xf32) <- (169x8xf32, 2401xi64) + index_select_3 = paddle._C_ops.index_select(data_8, reshape_31, 0) + del data_8 + + # pd_op.reshape: (49x49x8xf32) <- (2401x8xf32, 3xi64) + reshape_233 = paddle._C_ops.reshape(index_select_3, full_int_array_13) + + # pd_op.transpose: (8x49x49xf32) <- (49x49x8xf32) + transpose_22 = paddle._C_ops.transpose(reshape_233, [2, 0, 1]) + del reshape_233 + + # pd_op.unsqueeze: (1x8x49x49xf32) <- (8x49x49xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_22, full_int_array_7) + + # pd_op.add: (-1x8x49x49xf32) <- (-1x8x49x49xf32, 1x8x49x49xf32) + add_22 = paddle._C_ops.add(matmul_17, unsqueeze_4) + + # pd_op.full: (xi64) <- () + full_51 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_1 = paddle._C_ops.floor_divide(slice_52, full_51) + del full_51 + + # pd_op.full: (xi64) <- () + full_52 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_28 = [floor_divide_1, full_52, full_27, full_29, full_29] + del floor_divide_1 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_27 = paddle._C_ops.stack(combine_28, 0) + del combine_28 + + # pd_op.reshape: (-1x16x8x49x49xf32) <- (-1x8x49x49xf32, 5xi64) + reshape_32 = paddle._C_ops.reshape(add_22, stack_27) + del stack_27 + + # pd_op.unsqueeze: (16x1x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(where_3, full_int_array_8) + del where_3 + + # pd_op.unsqueeze: (1x16x1x49x49xf32) <- (16x1x49x49xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_42, full_int_array_7) + del unsqueeze_42 + + # pd_op.add: (-1x16x8x49x49xf32) <- (-1x16x8x49x49xf32, 1x16x1x49x49xf32) + add_23 = paddle._C_ops.add(reshape_32, unsqueeze_5) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_29 = [slice_52, full_27, full_29, full_29] + del full_27 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_28 = paddle._C_ops.stack(combine_29, 0) + del combine_29 + + # pd_op.reshape: (-1x8x49x49xf32) <- (-1x16x8x49x49xf32, 4xi64) + reshape_234 = paddle._C_ops.reshape(add_23, stack_28) + del stack_28 + + # pd_op.softmax: (-1x8x49x49xf32) <- (-1x8x49x49xf32) + softmax_3 = paddle._C_ops.softmax(reshape_234, -1) + del reshape_234 + + # pd_op.matmul: (-1x8x49x32xf32) <- (-1x8x49x49xf32, -1x8x49x32xf32) + matmul_127 = paddle._C_ops.matmul(softmax_3, slice_3, False, False) + + # pd_op.transpose: (-1x49x8x32xf32) <- (-1x8x49x32xf32) + transpose_23 = paddle._C_ops.transpose(matmul_127, [0, 2, 1, 3]) + del matmul_127 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_30 = [slice_52, full_29, full_46] + del slice_52 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_29 = paddle._C_ops.stack(combine_30, 0) + del combine_30 + + # pd_op.reshape: (-1x49x256xf32) <- (-1x49x8x32xf32, 3xi64) + reshape_33 = paddle._C_ops.reshape(transpose_23, stack_29) + del stack_29 + + # pd_op.matmul: (-1x49x256xf32) <- (-1x49x256xf32, 256x256xf32) + matmul_18 = paddle._C_ops.matmul(reshape_33, parameter_257, False, False) + del parameter_257 + + # pd_op.add: (-1x49x256xf32) <- (-1x49x256xf32, 256xf32) + add_24 = paddle._C_ops.add(matmul_18, parameter_256) + del parameter_256 + + # pd_op.reshape: (-1x7x7x256xf32) <- (-1x49x256xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_24, full_int_array_33) + del full_int_array_33 + + # pd_op.reshape: (-1x4x4x7x7x256xf32) <- (-1x7x7x256xf32, 6xi64) + reshape_235 = paddle._C_ops.reshape(reshape_34, full_int_array_35) + del full_int_array_35 + + # pd_op.transpose: (-1x4x7x4x7x256xf32) <- (-1x4x4x7x7x256xf32) + transpose_24 = paddle._C_ops.transpose(reshape_235, [0, 1, 3, 2, 4, 5]) + del reshape_235 + + # pd_op.reshape: (-1x28x28x256xf32) <- (-1x4x7x4x7x256xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_24, full_int_array_36) + del full_int_array_36 + + # pd_op.roll: (-1x28x28x256xf32) <- (-1x28x28x256xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_35, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_31 = [slice_49, full_47, full_46] + del full_47, slice_49 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_30 = paddle._C_ops.stack(combine_31, 0) + del combine_31 + + # pd_op.reshape: (-1x784x256xf32) <- (-1x28x28x256xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(roll_3, stack_30) + del stack_30 + + # pd_op.full: (xf32) <- () + full_4 = paddle._C_ops.full( + [], + float("0.934783"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_42 = full_4 + + # pd_op.shape64: (3xi64) <- (-1x784x256xf32) + shape64_23 = paddle._C_ops.shape64(reshape_36) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + shape64_23, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_23 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_32 = [slice_55, full_40, full_40] + del slice_55 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_31 = paddle._C_ops.stack(combine_32, 0) + del combine_32 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_4 = paddle._C_ops.uniform( + stack_31, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_31 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_176 = paddle._C_ops.add(full_4, uniform_4) + del uniform_4 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_4 = paddle._C_ops.floor(add_176) + del add_176 + + # pd_op.divide: (-1x784x256xf32) <- (-1x784x256xf32, xf32) + divide_4 = paddle._C_ops.divide(reshape_36, full_4) + + # pd_op.multiply: (-1x784x256xf32) <- (-1x784x256xf32, -1x1x1xf32) + multiply_4 = paddle._C_ops.multiply(divide_4, floor_4) + + # pd_op.add: (-1x784x256xf32) <- (-1x784x256xf32, -1x784x256xf32) + add_25 = paddle._C_ops.add(add_20, multiply_4) + + # pd_op.layer_norm: (-1x784x256xf32, -1x784xf32, -1x784xf32) <- (-1x784x256xf32, 256xf32, 256xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (-1x784x1024xf32) <- (-1x784x256xf32, 256x1024xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del parameter_253 + + # pd_op.add: (-1x784x1024xf32) <- (-1x784x1024xf32, 1024xf32) + add_26 = paddle._C_ops.add(matmul_19, parameter_252) + del parameter_252 + + # pd_op.gelu: (-1x784x1024xf32) <- (-1x784x1024xf32) + gelu_3 = paddle._C_ops.gelu(add_26, False) + + # pd_op.matmul: (-1x784x256xf32) <- (-1x784x1024xf32, 1024x256xf32) + matmul_20 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del parameter_251 + + # pd_op.add: (-1x784x256xf32) <- (-1x784x256xf32, 256xf32) + add_27 = paddle._C_ops.add(matmul_20, parameter_250) + del parameter_250 + + # pd_op.shape64: (3xi64) <- (-1x784x256xf32) + shape64_24 = paddle._C_ops.shape64(add_27) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + shape64_24, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_24 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_33 = [slice_56, full_40, full_40] + del slice_56 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_32 = paddle._C_ops.stack(combine_33, 0) + del combine_33 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_5 = paddle._C_ops.uniform( + stack_32, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_32 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_177 = paddle._C_ops.add(full_4, uniform_5) + del uniform_5 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_5 = paddle._C_ops.floor(add_177) + del add_177 + + # pd_op.divide: (-1x784x256xf32) <- (-1x784x256xf32, xf32) + divide_5 = paddle._C_ops.divide(add_27, full_4) + + # pd_op.multiply: (-1x784x256xf32) <- (-1x784x256xf32, -1x1x1xf32) + multiply_5 = paddle._C_ops.multiply(divide_5, floor_5) + + # pd_op.add: (-1x784x256xf32) <- (-1x784x256xf32, -1x784x256xf32) + add_28 = paddle._C_ops.add(add_25, multiply_5) + + # pd_op.shape64: (3xi64) <- (-1x784x256xf32) + shape64_25 = paddle._C_ops.shape64(add_28) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + shape64_25, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_25 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_34 = [slice_57, full_45, full_45, full_46] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_33 = paddle._C_ops.stack(combine_34, 0) + del combine_34 + + # pd_op.reshape: (-1x28x28x256xf32) <- (-1x784x256xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_28, stack_33) + del stack_33 + + # pd_op.strided_slice: (-1x14x14x256xf32) <- (-1x28x28x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_16, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x14x14x256xf32) <- (-1x28x28x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_5, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x14x14x256xf32) <- (-1x28x28x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_6, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x14x14x256xf32) <- (-1x28x28x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_18, full_int_array_29, full_int_array_4 + ) + + # pd_op.shape64: (4xi64) <- (-1x28x28x256xf32) + shape64_26 = paddle._C_ops.shape64(reshape_37) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + shape64_26, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_26 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_35 = [slice_58, full_45, full_45, full_46] + del full_45, full_46, slice_58 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_34 = paddle._C_ops.stack(combine_35, 0) + del combine_35 + + # pd_op.reshape: (-1x28x28x256xf32) <- (-1x28x28x256xf32, 4xi64) + reshape_236 = paddle._C_ops.reshape(reshape_37, stack_34) + del stack_34 + + # builtin.combine: ([-1x14x14x256xf32, -1x14x14x256xf32, -1x14x14x256xf32, -1x14x14x256xf32]) <- (-1x14x14x256xf32, -1x14x14x256xf32, -1x14x14x256xf32, -1x14x14x256xf32) + combine_36 = [ + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + ] + + # pd_op.concat: (-1x14x14x1024xf32) <- ([-1x14x14x256xf32, -1x14x14x256xf32, -1x14x14x256xf32, -1x14x14x256xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_36, full_2) + del combine_36 + + # pd_op.full: (xi64) <- () + full_53 = paddle._C_ops.full( + [], float("1024"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_37 = [slice_57, full_43, full_53] + del slice_57 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_35 = paddle._C_ops.stack(combine_37, 0) + del combine_37 + + # pd_op.reshape: (-1x-1x1024xf32) <- (-1x14x14x1024xf32, 3xi64) + reshape_38 = paddle._C_ops.reshape(concat_1, stack_35) + del stack_35 + + # pd_op.layer_norm: (-1x-1x1024xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1024xf32, 1024xf32, 1024xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_38, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249 + + # pd_op.matmul: (-1x-1x512xf32) <- (-1x-1x1024xf32, 1024x512xf32) + matmul_21 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del parameter_247 + + # pd_op.shape64: (3xi64) <- (-1x-1x512xf32) + shape64_27 = paddle._C_ops.shape64(matmul_21) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + shape64_27, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_27 + + # pd_op.shape64: (3xi64) <- (-1x-1x512xf32) + shape64_28 = paddle._C_ops.shape64(matmul_21) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + shape64_28, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + del shape64_28 + + # pd_op.layer_norm: (-1x-1x512xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x512xf32, 512xf32, 512xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_21, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full: (xi64) <- () + full_54 = paddle._C_ops.full( + [], float("14"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_38 = [slice_59, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_36 = paddle._C_ops.stack(combine_38, 0) + del combine_38 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x-1x512xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(layer_norm_33, stack_36) + del stack_36 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_29 = paddle._C_ops.shape64(reshape_39) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + shape64_29, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_29 + + # pd_op.full: (xi64) <- () + full_55 = paddle._C_ops.full( + [], float("2"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_39 = [slice_61, full_55, full_28, full_55, full_28, full_44] + del slice_61 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_37 = paddle._C_ops.stack(combine_39, 0) + del combine_39 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_237 = paddle._C_ops.reshape(reshape_39, stack_37) + del stack_37 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_25 = paddle._C_ops.transpose(reshape_237, [0, 1, 3, 2, 4, 5]) + del reshape_237 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_38 = [-1, 7, 7, 512] + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(transpose_25, full_int_array_38) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_39 = [-1, 49, 512] + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(reshape_40, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_30 = paddle._C_ops.shape64(reshape_41) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + shape64_30, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_30 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_22 = paddle._C_ops.matmul(reshape_41, parameter_244, False, False) + del parameter_244 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_29 = paddle._C_ops.add(matmul_22, parameter_243) + del parameter_243 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_40 = [slice_62, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_38 = paddle._C_ops.stack(combine_40, 0) + del combine_40 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_238 = paddle._C_ops.reshape(add_29, stack_38) + del stack_38 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_26 = paddle._C_ops.transpose(reshape_238, [2, 0, 3, 1, 4]) + del reshape_238 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_63, full_0, float("0"), True) + del slice_63 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_27 = paddle._C_ops.transpose(slice_64, [0, 1, 3, 2]) + del slice_64 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_23 = paddle._C_ops.matmul(scale_4, transpose_27, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_42 = paddle._C_ops.reshape(data_9, full_int_array_12) + del data_9 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_4 = paddle._C_ops.index_select(data_10, reshape_42, 0) + del data_10 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_239 = paddle._C_ops.reshape(index_select_4, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_28 = paddle._C_ops.transpose(reshape_239, [2, 0, 1]) + del reshape_239 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_28, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_178 = paddle._C_ops.add(matmul_23, unsqueeze_6) + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_4 = paddle._C_ops.softmax(add_178, -1) + del add_178 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_128 = paddle._C_ops.matmul(softmax_4, slice_4, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_29 = paddle._C_ops.transpose(matmul_128, [0, 2, 1, 3]) + del matmul_128 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_41 = [slice_62, full_29, full_44] + del slice_62 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_39 = paddle._C_ops.stack(combine_41, 0) + del combine_41 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_29, stack_39) + del stack_39 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_24 = paddle._C_ops.matmul(reshape_43, parameter_242, False, False) + del parameter_242 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_241) + del parameter_241 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_30, full_int_array_38) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_40 = [-1, 2, 2, 7, 7, 512] + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_44, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_30 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_41 = [-1, 14, 14, 512] + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(transpose_30, full_int_array_41) + + # pd_op.full: (xi64) <- () + full_56 = paddle._C_ops.full( + [], float("196"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_42 = [slice_59, full_56, full_44] + del slice_59 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_40 = paddle._C_ops.stack(combine_42, 0) + del combine_42 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_46 = paddle._C_ops.reshape(reshape_45, stack_40) + del stack_40 + + # pd_op.full: (xf32) <- () + full_5 = paddle._C_ops.full( + [], + float("0.913043"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_64 = full_5 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_31 = paddle._C_ops.shape64(reshape_46) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + shape64_31, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_31 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_43 = [slice_65, full_40, full_40] + del slice_65 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_41 = paddle._C_ops.stack(combine_43, 0) + del combine_43 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_6 = paddle._C_ops.uniform( + stack_41, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_41 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_179 = paddle._C_ops.add(full_5, uniform_6) + del uniform_6 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_6 = paddle._C_ops.floor(add_179) + del add_179 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_6 = paddle._C_ops.divide(reshape_46, full_5) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_6 = paddle._C_ops.multiply(divide_6, floor_6) + + # pd_op.add: (-1x196x512xf32) <- (-1x-1x512xf32, -1x196x512xf32) + add_31 = paddle._C_ops.add(matmul_21, multiply_6) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_31, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del parameter_238 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_32 = paddle._C_ops.add(matmul_25, parameter_237) + del parameter_237 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_4 = paddle._C_ops.gelu(add_32, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_26 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del parameter_236 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_33 = paddle._C_ops.add(matmul_26, parameter_235) + del parameter_235 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_32 = paddle._C_ops.shape64(add_33) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + shape64_32, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_32 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_44 = [slice_66, full_40, full_40] + del slice_66 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_42 = paddle._C_ops.stack(combine_44, 0) + del combine_44 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_7 = paddle._C_ops.uniform( + stack_42, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_42 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_180 = paddle._C_ops.add(full_5, uniform_7) + del uniform_7 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_7 = paddle._C_ops.floor(add_180) + del add_180 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_7 = paddle._C_ops.divide(add_33, full_5) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_7 = paddle._C_ops.multiply(divide_7, floor_7) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_34 = paddle._C_ops.add(add_31, multiply_7) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_33 = paddle._C_ops.shape64(add_34) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + shape64_33, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_33 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_45 = [slice_67, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_43 = paddle._C_ops.stack(combine_45, 0) + del combine_45 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(layer_norm_39, stack_43) + del stack_43 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_34 = paddle._C_ops.shape64(reshape_47) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + shape64_34, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_34 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_47, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_35 = paddle._C_ops.shape64(roll_4) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + shape64_35, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_35 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_46 = [slice_69, full_55, full_28, full_55, full_28, full_44] + del slice_69 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_44 = paddle._C_ops.stack(combine_46, 0) + del combine_46 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_241 = paddle._C_ops.reshape(roll_4, stack_44) + del stack_44 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_31 = paddle._C_ops.transpose(reshape_241, [0, 1, 3, 2, 4, 5]) + del reshape_241 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(transpose_31, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_49 = paddle._C_ops.reshape(reshape_48, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_57 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_57, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_57 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_42 = [1, 2, 7, 2, 7, 1] + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_242 = paddle._C_ops.reshape(set_value__2, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_148 = paddle._C_ops.transpose(reshape_242, [0, 1, 3, 2, 4, 5]) + del reshape_242 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_243 = paddle._C_ops.reshape(transpose_148, full_int_array_31) + del transpose_148 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_244 = paddle._C_ops.reshape(reshape_243, full_int_array_32) + del reshape_243 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_0) + del reshape_244 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_35) + + # pd_op.full: (4x49x49xf32) <- () + full_58 = paddle._C_ops.full( + [4, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_58, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_35) + + # pd_op.full: (4x49x49xf32) <- () + full_59 = paddle._C_ops.full( + [4, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_5 = paddle._C_ops.where(equal_2, full_59, where_4) + del equal_2, where_4 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_36 = paddle._C_ops.shape64(reshape_49) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + shape64_36, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_36 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_27 = paddle._C_ops.matmul(reshape_49, parameter_232, False, False) + del parameter_232 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_35 = paddle._C_ops.add(matmul_27, parameter_231) + del parameter_231 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_47 = [slice_70, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_45 = paddle._C_ops.stack(combine_47, 0) + del combine_47 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_245 = paddle._C_ops.reshape(add_35, stack_45) + del stack_45 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_32 = paddle._C_ops.transpose(reshape_245, [2, 0, 3, 1, 4]) + del reshape_245 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_72 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_71, full_0, float("0"), True) + del slice_71 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_33 = paddle._C_ops.transpose(slice_72, [0, 1, 3, 2]) + del slice_72 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_28 = paddle._C_ops.matmul(scale_5, transpose_33, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_50 = paddle._C_ops.reshape(data_11, full_int_array_12) + del data_11 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_5 = paddle._C_ops.index_select(data_12, reshape_50, 0) + del data_12 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(index_select_5, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_34 = paddle._C_ops.transpose(reshape_246, [2, 0, 1]) + del reshape_246 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(transpose_34, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_36 = paddle._C_ops.add(matmul_28, unsqueeze_7) + + # pd_op.full: (xi64) <- () + full_60 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_2 = paddle._C_ops.floor_divide(slice_70, full_60) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_48 = [floor_divide_2, full_31, full_52, full_29, full_29] + del floor_divide_2 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_46 = paddle._C_ops.stack(combine_48, 0) + del combine_48 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_51 = paddle._C_ops.reshape(add_36, stack_46) + del stack_46 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(where_5, full_int_array_8) + del where_5 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(unsqueeze_45, full_int_array_7) + del unsqueeze_45 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_37 = paddle._C_ops.add(reshape_51, unsqueeze_8) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_49 = [slice_70, full_52, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_47 = paddle._C_ops.stack(combine_49, 0) + del combine_49 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_37, stack_47) + del stack_47 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_5 = paddle._C_ops.softmax(reshape_247, -1) + del reshape_247 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_129 = paddle._C_ops.matmul(softmax_5, slice_5, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_35 = paddle._C_ops.transpose(matmul_129, [0, 2, 1, 3]) + del matmul_129 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_50 = [slice_70, full_29, full_44] + del slice_70 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_48 = paddle._C_ops.stack(combine_50, 0) + del combine_50 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_52 = paddle._C_ops.reshape(transpose_35, stack_48) + del stack_48 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_29 = paddle._C_ops.matmul(reshape_52, parameter_230, False, False) + del parameter_230 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_38 = paddle._C_ops.add(matmul_29, parameter_229) + del parameter_229 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(add_38, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_53, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_36 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(transpose_36, full_int_array_41) + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_54, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_51 = [slice_67, full_56, full_44] + del slice_67 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_49 = paddle._C_ops.stack(combine_51, 0) + del combine_51 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(roll_5, stack_49) + del stack_49 + + # pd_op.full: (xf32) <- () + full_6 = paddle._C_ops.full( + [], + float("0.891304"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_75 = full_6 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_37 = paddle._C_ops.shape64(reshape_55) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_73 = paddle._C_ops.slice( + shape64_37, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_37 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_52 = [slice_73, full_40, full_40] + del slice_73 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_50 = paddle._C_ops.stack(combine_52, 0) + del combine_52 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_8 = paddle._C_ops.uniform( + stack_50, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_50 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_181 = paddle._C_ops.add(full_6, uniform_8) + del uniform_8 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_8 = paddle._C_ops.floor(add_181) + del add_181 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_8 = paddle._C_ops.divide(reshape_55, full_6) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_8 = paddle._C_ops.multiply(divide_8, floor_8) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_39 = paddle._C_ops.add(add_34, multiply_8) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_39, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del parameter_226 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_40 = paddle._C_ops.add(matmul_30, parameter_225) + del parameter_225 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_5 = paddle._C_ops.gelu(add_40, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_31 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del parameter_224 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_41 = paddle._C_ops.add(matmul_31, parameter_223) + del parameter_223 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_38 = paddle._C_ops.shape64(add_41) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_74 = paddle._C_ops.slice( + shape64_38, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_38 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_53 = [slice_74, full_40, full_40] + del slice_74 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_51 = paddle._C_ops.stack(combine_53, 0) + del combine_53 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_9 = paddle._C_ops.uniform( + stack_51, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_51 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_182 = paddle._C_ops.add(full_6, uniform_9) + del uniform_9 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_9 = paddle._C_ops.floor(add_182) + del add_182 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_9 = paddle._C_ops.divide(add_41, full_6) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_9 = paddle._C_ops.multiply(divide_9, floor_9) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_42 = paddle._C_ops.add(add_39, multiply_9) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_39 = paddle._C_ops.shape64(add_42) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_75 = paddle._C_ops.slice( + shape64_39, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_39 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_54 = [slice_75, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_52 = paddle._C_ops.stack(combine_54, 0) + del combine_54 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(layer_norm_45, stack_52) + del stack_52 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_40 = paddle._C_ops.shape64(reshape_56) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_76 = paddle._C_ops.slice( + shape64_40, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_40 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_55 = [slice_76, full_55, full_28, full_55, full_28, full_44] + del slice_76 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_53 = paddle._C_ops.stack(combine_55, 0) + del combine_55 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_249 = paddle._C_ops.reshape(reshape_56, stack_53) + del stack_53 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_37 = paddle._C_ops.transpose(reshape_249, [0, 1, 3, 2, 4, 5]) + del reshape_249 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(transpose_37, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_58 = paddle._C_ops.reshape(reshape_57, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_41 = paddle._C_ops.shape64(reshape_58) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_77 = paddle._C_ops.slice( + shape64_41, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_41 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_32 = paddle._C_ops.matmul(reshape_58, parameter_220, False, False) + del parameter_220 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_43 = paddle._C_ops.add(matmul_32, parameter_219) + del parameter_219 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_56 = [slice_77, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_54 = paddle._C_ops.stack(combine_56, 0) + del combine_56 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_250 = paddle._C_ops.reshape(add_43, stack_54) + del stack_54 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_38 = paddle._C_ops.transpose(reshape_250, [2, 0, 3, 1, 4]) + del reshape_250 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_78 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_79 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_78, full_0, float("0"), True) + del slice_78 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_39 = paddle._C_ops.transpose(slice_79, [0, 1, 3, 2]) + del slice_79 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_33 = paddle._C_ops.matmul(scale_6, transpose_39, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_59 = paddle._C_ops.reshape(data_13, full_int_array_12) + del data_13 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_6 = paddle._C_ops.index_select(data_14, reshape_59, 0) + del data_14 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_251 = paddle._C_ops.reshape(index_select_6, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_40 = paddle._C_ops.transpose(reshape_251, [2, 0, 1]) + del reshape_251 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_40, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_183 = paddle._C_ops.add(matmul_33, unsqueeze_9) + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_6 = paddle._C_ops.softmax(add_183, -1) + del add_183 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_6, slice_6, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_41 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_57 = [slice_77, full_29, full_44] + del slice_77 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_55 = paddle._C_ops.stack(combine_57, 0) + del combine_57 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_60 = paddle._C_ops.reshape(transpose_41, stack_55) + del stack_55 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_34 = paddle._C_ops.matmul(reshape_60, parameter_218, False, False) + del parameter_218 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_44 = paddle._C_ops.add(matmul_34, parameter_217) + del parameter_217 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(add_44, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(reshape_61, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_42 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(transpose_42, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_58 = [slice_75, full_56, full_44] + del slice_75 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_56 = paddle._C_ops.stack(combine_58, 0) + del combine_58 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, stack_56) + del stack_56 + + # pd_op.full: (xf32) <- () + full_7 = paddle._C_ops.full( + [], + float("0.869565"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_84 = full_7 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_42 = paddle._C_ops.shape64(reshape_63) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_80 = paddle._C_ops.slice( + shape64_42, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_42 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_59 = [slice_80, full_40, full_40] + del slice_80 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_57 = paddle._C_ops.stack(combine_59, 0) + del combine_59 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_10 = paddle._C_ops.uniform( + stack_57, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_57 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_184 = paddle._C_ops.add(full_7, uniform_10) + del uniform_10 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_10 = paddle._C_ops.floor(add_184) + del add_184 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_10 = paddle._C_ops.divide(reshape_63, full_7) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_10 = paddle._C_ops.multiply(divide_10, floor_10) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_45 = paddle._C_ops.add(add_42, multiply_10) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del parameter_214 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_46 = paddle._C_ops.add(matmul_35, parameter_213) + del parameter_213 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_6 = paddle._C_ops.gelu(add_46, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_36 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del parameter_212 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_47 = paddle._C_ops.add(matmul_36, parameter_211) + del parameter_211 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_43 = paddle._C_ops.shape64(add_47) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_81 = paddle._C_ops.slice( + shape64_43, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_43 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_60 = [slice_81, full_40, full_40] + del slice_81 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_58 = paddle._C_ops.stack(combine_60, 0) + del combine_60 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_11 = paddle._C_ops.uniform( + stack_58, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_58 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_185 = paddle._C_ops.add(full_7, uniform_11) + del uniform_11 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_11 = paddle._C_ops.floor(add_185) + del add_185 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_11 = paddle._C_ops.divide(add_47, full_7) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_11 = paddle._C_ops.multiply(divide_11, floor_11) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_48 = paddle._C_ops.add(add_45, multiply_11) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_44 = paddle._C_ops.shape64(add_48) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_82 = paddle._C_ops.slice( + shape64_44, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_44 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_61 = [slice_82, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_59 = paddle._C_ops.stack(combine_61, 0) + del combine_61 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(layer_norm_51, stack_59) + del stack_59 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_45 = paddle._C_ops.shape64(reshape_64) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_83 = paddle._C_ops.slice( + shape64_45, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_45 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_64, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_46 = paddle._C_ops.shape64(roll_6) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_84 = paddle._C_ops.slice( + shape64_46, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_46 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_62 = [slice_84, full_55, full_28, full_55, full_28, full_44] + del slice_84 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_60 = paddle._C_ops.stack(combine_62, 0) + del combine_62 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_253 = paddle._C_ops.reshape(roll_6, stack_60) + del stack_60 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_43 = paddle._C_ops.transpose(reshape_253, [0, 1, 3, 2, 4, 5]) + del reshape_253 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(transpose_43, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_61 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_61, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_61 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_254 = paddle._C_ops.reshape(set_value__3, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_149 = paddle._C_ops.transpose(reshape_254, [0, 1, 3, 2, 4, 5]) + del reshape_254 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_255 = paddle._C_ops.reshape(transpose_149, full_int_array_31) + del transpose_149 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_256 = paddle._C_ops.reshape(reshape_255, full_int_array_32) + del reshape_255 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_0) + del reshape_256 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_46, unsqueeze_47) + del unsqueeze_46, unsqueeze_47 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_58, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_7 = paddle._C_ops.where(equal_3, full_59, where_6) + del equal_3, where_6 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_47 = paddle._C_ops.shape64(reshape_66) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_85 = paddle._C_ops.slice( + shape64_47, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_47 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_37 = paddle._C_ops.matmul(reshape_66, parameter_208, False, False) + del parameter_208 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_49 = paddle._C_ops.add(matmul_37, parameter_207) + del parameter_207 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_63 = [slice_85, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_61 = paddle._C_ops.stack(combine_63, 0) + del combine_63 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_257 = paddle._C_ops.reshape(add_49, stack_61) + del stack_61 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_44 = paddle._C_ops.transpose(reshape_257, [2, 0, 3, 1, 4]) + del reshape_257 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_86 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_87 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_86, full_0, float("0"), True) + del slice_86 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_45 = paddle._C_ops.transpose(slice_87, [0, 1, 3, 2]) + del slice_87 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_38 = paddle._C_ops.matmul(scale_7, transpose_45, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_67 = paddle._C_ops.reshape(data_15, full_int_array_12) + del data_15 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_7 = paddle._C_ops.index_select(data_16, reshape_67, 0) + del data_16 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_258 = paddle._C_ops.reshape(index_select_7, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_46 = paddle._C_ops.transpose(reshape_258, [2, 0, 1]) + del reshape_258 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(transpose_46, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_50 = paddle._C_ops.add(matmul_38, unsqueeze_10) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_3 = paddle._C_ops.floor_divide(slice_85, full_60) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_64 = [floor_divide_3, full_31, full_52, full_29, full_29] + del floor_divide_3 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_62 = paddle._C_ops.stack(combine_64, 0) + del combine_64 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_68 = paddle._C_ops.reshape(add_50, stack_62) + del stack_62 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(where_7, full_int_array_8) + del where_7 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_48, full_int_array_7) + del unsqueeze_48 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_51 = paddle._C_ops.add(reshape_68, unsqueeze_11) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_65 = [slice_85, full_52, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_63 = paddle._C_ops.stack(combine_65, 0) + del combine_65 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_259 = paddle._C_ops.reshape(add_51, stack_63) + del stack_63 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_7 = paddle._C_ops.softmax(reshape_259, -1) + del reshape_259 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_131 = paddle._C_ops.matmul(softmax_7, slice_7, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_47 = paddle._C_ops.transpose(matmul_131, [0, 2, 1, 3]) + del matmul_131 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_66 = [slice_85, full_29, full_44] + del slice_85 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_64 = paddle._C_ops.stack(combine_66, 0) + del combine_66 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(transpose_47, stack_64) + del stack_64 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_39 = paddle._C_ops.matmul(reshape_69, parameter_206, False, False) + del parameter_206 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_52 = paddle._C_ops.add(matmul_39, parameter_205) + del parameter_205 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(add_52, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_260 = paddle._C_ops.reshape(reshape_70, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_48 = paddle._C_ops.transpose(reshape_260, [0, 1, 3, 2, 4, 5]) + del reshape_260 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(transpose_48, full_int_array_41) + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_71, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_67 = [slice_82, full_56, full_44] + del slice_82 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_65 = paddle._C_ops.stack(combine_67, 0) + del combine_67 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(roll_7, stack_65) + del stack_65 + + # pd_op.full: (xf32) <- () + full_8 = paddle._C_ops.full( + [], + float("0.847826"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_95 = full_8 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_48 = paddle._C_ops.shape64(reshape_72) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_88 = paddle._C_ops.slice( + shape64_48, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_48 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_68 = [slice_88, full_40, full_40] + del slice_88 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_66 = paddle._C_ops.stack(combine_68, 0) + del combine_68 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_12 = paddle._C_ops.uniform( + stack_66, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_66 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_186 = paddle._C_ops.add(full_8, uniform_12) + del uniform_12 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_12 = paddle._C_ops.floor(add_186) + del add_186 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_12 = paddle._C_ops.divide(reshape_72, full_8) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_12 = paddle._C_ops.multiply(divide_12, floor_12) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_53 = paddle._C_ops.add(add_48, multiply_12) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del parameter_202 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_54 = paddle._C_ops.add(matmul_40, parameter_201) + del parameter_201 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_7 = paddle._C_ops.gelu(add_54, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_41 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del parameter_200 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_55 = paddle._C_ops.add(matmul_41, parameter_199) + del parameter_199 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_49 = paddle._C_ops.shape64(add_55) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_89 = paddle._C_ops.slice( + shape64_49, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_49 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_69 = [slice_89, full_40, full_40] + del slice_89 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_67 = paddle._C_ops.stack(combine_69, 0) + del combine_69 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_13 = paddle._C_ops.uniform( + stack_67, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_67 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_187 = paddle._C_ops.add(full_8, uniform_13) + del uniform_13 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_13 = paddle._C_ops.floor(add_187) + del add_187 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_13 = paddle._C_ops.divide(add_55, full_8) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_13 = paddle._C_ops.multiply(divide_13, floor_13) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_56 = paddle._C_ops.add(add_53, multiply_13) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_50 = paddle._C_ops.shape64(add_56) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_90 = paddle._C_ops.slice( + shape64_50, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_50 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_70 = [slice_90, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_68 = paddle._C_ops.stack(combine_70, 0) + del combine_70 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(layer_norm_57, stack_68) + del stack_68 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_51 = paddle._C_ops.shape64(reshape_73) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_91 = paddle._C_ops.slice( + shape64_51, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_71 = [slice_91, full_55, full_28, full_55, full_28, full_44] + del slice_91 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_69 = paddle._C_ops.stack(combine_71, 0) + del combine_71 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_261 = paddle._C_ops.reshape(reshape_73, stack_69) + del stack_69 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_49 = paddle._C_ops.transpose(reshape_261, [0, 1, 3, 2, 4, 5]) + del reshape_261 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(transpose_49, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_75 = paddle._C_ops.reshape(reshape_74, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_52 = paddle._C_ops.shape64(reshape_75) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_92 = paddle._C_ops.slice( + shape64_52, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_52 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_42 = paddle._C_ops.matmul(reshape_75, parameter_196, False, False) + del parameter_196 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_57 = paddle._C_ops.add(matmul_42, parameter_195) + del parameter_195 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_72 = [slice_92, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_70 = paddle._C_ops.stack(combine_72, 0) + del combine_72 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_262 = paddle._C_ops.reshape(add_57, stack_70) + del stack_70 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_50 = paddle._C_ops.transpose(reshape_262, [2, 0, 3, 1, 4]) + del reshape_262 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_93 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_94 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_93, full_0, float("0"), True) + del slice_93 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_51 = paddle._C_ops.transpose(slice_94, [0, 1, 3, 2]) + del slice_94 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_43 = paddle._C_ops.matmul(scale_8, transpose_51, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_76 = paddle._C_ops.reshape(data_17, full_int_array_12) + del data_17 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_8 = paddle._C_ops.index_select(data_18, reshape_76, 0) + del data_18 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(index_select_8, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_52 = paddle._C_ops.transpose(reshape_263, [2, 0, 1]) + del reshape_263 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_52, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_188 = paddle._C_ops.add(matmul_43, unsqueeze_12) + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_8 = paddle._C_ops.softmax(add_188, -1) + del add_188 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_132 = paddle._C_ops.matmul(softmax_8, slice_8, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_53 = paddle._C_ops.transpose(matmul_132, [0, 2, 1, 3]) + del matmul_132 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_73 = [slice_92, full_29, full_44] + del slice_92 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_71 = paddle._C_ops.stack(combine_73, 0) + del combine_73 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_77 = paddle._C_ops.reshape(transpose_53, stack_71) + del stack_71 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_44 = paddle._C_ops.matmul(reshape_77, parameter_194, False, False) + del parameter_194 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_58 = paddle._C_ops.add(matmul_44, parameter_193) + del parameter_193 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(add_58, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_264 = paddle._C_ops.reshape(reshape_78, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_54 = paddle._C_ops.transpose(reshape_264, [0, 1, 3, 2, 4, 5]) + del reshape_264 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_54, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_74 = [slice_90, full_56, full_44] + del slice_90 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_72 = paddle._C_ops.stack(combine_74, 0) + del combine_74 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, stack_72) + del stack_72 + + # pd_op.full: (xf32) <- () + full_9 = paddle._C_ops.full( + [], + float("0.826087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_104 = full_9 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_53 = paddle._C_ops.shape64(reshape_80) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_95 = paddle._C_ops.slice( + shape64_53, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_53 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_75 = [slice_95, full_40, full_40] + del slice_95 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_73 = paddle._C_ops.stack(combine_75, 0) + del combine_75 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_14 = paddle._C_ops.uniform( + stack_73, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_73 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_189 = paddle._C_ops.add(full_9, uniform_14) + del uniform_14 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_14 = paddle._C_ops.floor(add_189) + del add_189 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_14 = paddle._C_ops.divide(reshape_80, full_9) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_14 = paddle._C_ops.multiply(divide_14, floor_14) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_59 = paddle._C_ops.add(add_56, multiply_14) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_59, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_45 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_60 = paddle._C_ops.add(matmul_45, parameter_189) + del parameter_189 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_8 = paddle._C_ops.gelu(add_60, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_46 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del parameter_188 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_61 = paddle._C_ops.add(matmul_46, parameter_187) + del parameter_187 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_54 = paddle._C_ops.shape64(add_61) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_96 = paddle._C_ops.slice( + shape64_54, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_54 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_76 = [slice_96, full_40, full_40] + del slice_96 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_74 = paddle._C_ops.stack(combine_76, 0) + del combine_76 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_15 = paddle._C_ops.uniform( + stack_74, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_74 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_190 = paddle._C_ops.add(full_9, uniform_15) + del uniform_15 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_15 = paddle._C_ops.floor(add_190) + del add_190 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_15 = paddle._C_ops.divide(add_61, full_9) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_15 = paddle._C_ops.multiply(divide_15, floor_15) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_62 = paddle._C_ops.add(add_59, multiply_15) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_55 = paddle._C_ops.shape64(add_62) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_97 = paddle._C_ops.slice( + shape64_55, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_55 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_77 = [slice_97, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_75 = paddle._C_ops.stack(combine_77, 0) + del combine_77 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(layer_norm_63, stack_75) + del stack_75 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_56 = paddle._C_ops.shape64(reshape_81) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_98 = paddle._C_ops.slice( + shape64_56, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_56 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_81, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_57 = paddle._C_ops.shape64(roll_8) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_99 = paddle._C_ops.slice( + shape64_57, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_57 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_78 = [slice_99, full_55, full_28, full_55, full_28, full_44] + del slice_99 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_76 = paddle._C_ops.stack(combine_78, 0) + del combine_78 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(roll_8, stack_76) + del stack_76 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_55 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_55, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_62 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_62, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_62 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_266 = paddle._C_ops.reshape(set_value__4, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_150 = paddle._C_ops.transpose(reshape_266, [0, 1, 3, 2, 4, 5]) + del reshape_266 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_267 = paddle._C_ops.reshape(transpose_150, full_int_array_31) + del transpose_150 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_268 = paddle._C_ops.reshape(reshape_267, full_int_array_32) + del reshape_267 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_0) + del reshape_268 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_58, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_9 = paddle._C_ops.where(equal_4, full_59, where_8) + del equal_4, where_8 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_58 = paddle._C_ops.shape64(reshape_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_100 = paddle._C_ops.slice( + shape64_58, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_58 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_47 = paddle._C_ops.matmul(reshape_83, parameter_184, False, False) + del parameter_184 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_63 = paddle._C_ops.add(matmul_47, parameter_183) + del parameter_183 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_79 = [slice_100, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_77 = paddle._C_ops.stack(combine_79, 0) + del combine_79 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_269 = paddle._C_ops.reshape(add_63, stack_77) + del stack_77 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_56 = paddle._C_ops.transpose(reshape_269, [2, 0, 3, 1, 4]) + del reshape_269 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_101 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_102 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_101, full_0, float("0"), True) + del slice_101 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_57 = paddle._C_ops.transpose(slice_102, [0, 1, 3, 2]) + del slice_102 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_48 = paddle._C_ops.matmul(scale_9, transpose_57, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_84 = paddle._C_ops.reshape(data_19, full_int_array_12) + del data_19 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_9 = paddle._C_ops.index_select(data_20, reshape_84, 0) + del data_20 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_270 = paddle._C_ops.reshape(index_select_9, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_58 = paddle._C_ops.transpose(reshape_270, [2, 0, 1]) + del reshape_270 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(transpose_58, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_64 = paddle._C_ops.add(matmul_48, unsqueeze_13) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_4 = paddle._C_ops.floor_divide(slice_100, full_60) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_80 = [floor_divide_4, full_31, full_52, full_29, full_29] + del floor_divide_4 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_78 = paddle._C_ops.stack(combine_80, 0) + del combine_80 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_85 = paddle._C_ops.reshape(add_64, stack_78) + del stack_78 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(where_9, full_int_array_8) + del where_9 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(unsqueeze_51, full_int_array_7) + del unsqueeze_51 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_65 = paddle._C_ops.add(reshape_85, unsqueeze_14) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_81 = [slice_100, full_52, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_79 = paddle._C_ops.stack(combine_81, 0) + del combine_81 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_271 = paddle._C_ops.reshape(add_65, stack_79) + del stack_79 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_9 = paddle._C_ops.softmax(reshape_271, -1) + del reshape_271 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_133 = paddle._C_ops.matmul(softmax_9, slice_9, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_59 = paddle._C_ops.transpose(matmul_133, [0, 2, 1, 3]) + del matmul_133 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_82 = [slice_100, full_29, full_44] + del slice_100 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_80 = paddle._C_ops.stack(combine_82, 0) + del combine_82 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(transpose_59, stack_80) + del stack_80 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_49 = paddle._C_ops.matmul(reshape_86, parameter_182, False, False) + del parameter_182 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_66 = paddle._C_ops.add(matmul_49, parameter_181) + del parameter_181 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(add_66, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_272 = paddle._C_ops.reshape(reshape_87, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_60 = paddle._C_ops.transpose(reshape_272, [0, 1, 3, 2, 4, 5]) + del reshape_272 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(transpose_60, full_int_array_41) + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_88, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_83 = [slice_97, full_56, full_44] + del slice_97 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_81 = paddle._C_ops.stack(combine_83, 0) + del combine_83 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(roll_9, stack_81) + del stack_81 + + # pd_op.full: (xf32) <- () + full_10 = paddle._C_ops.full( + [], + float("0.804348"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_115 = full_10 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_59 = paddle._C_ops.shape64(reshape_89) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_103 = paddle._C_ops.slice( + shape64_59, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_59 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_84 = [slice_103, full_40, full_40] + del slice_103 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_82 = paddle._C_ops.stack(combine_84, 0) + del combine_84 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_16 = paddle._C_ops.uniform( + stack_82, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_82 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_191 = paddle._C_ops.add(full_10, uniform_16) + del uniform_16 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_16 = paddle._C_ops.floor(add_191) + del add_191 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_16 = paddle._C_ops.divide(reshape_89, full_10) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_16 = paddle._C_ops.multiply(divide_16, floor_16) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_67 = paddle._C_ops.add(add_62, multiply_16) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_67, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del parameter_178 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_68 = paddle._C_ops.add(matmul_50, parameter_177) + del parameter_177 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_9 = paddle._C_ops.gelu(add_68, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_51 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del parameter_176 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_69 = paddle._C_ops.add(matmul_51, parameter_175) + del parameter_175 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_60 = paddle._C_ops.shape64(add_69) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_104 = paddle._C_ops.slice( + shape64_60, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_60 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_85 = [slice_104, full_40, full_40] + del slice_104 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_83 = paddle._C_ops.stack(combine_85, 0) + del combine_85 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_17 = paddle._C_ops.uniform( + stack_83, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_83 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_192 = paddle._C_ops.add(full_10, uniform_17) + del uniform_17 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_17 = paddle._C_ops.floor(add_192) + del add_192 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_17 = paddle._C_ops.divide(add_69, full_10) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_17 = paddle._C_ops.multiply(divide_17, floor_17) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_70 = paddle._C_ops.add(add_67, multiply_17) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_61 = paddle._C_ops.shape64(add_70) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_105 = paddle._C_ops.slice( + shape64_61, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_61 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_86 = [slice_105, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_84 = paddle._C_ops.stack(combine_86, 0) + del combine_86 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(layer_norm_69, stack_84) + del stack_84 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_62 = paddle._C_ops.shape64(reshape_90) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_106 = paddle._C_ops.slice( + shape64_62, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_62 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_87 = [slice_106, full_55, full_28, full_55, full_28, full_44] + del slice_106 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_85 = paddle._C_ops.stack(combine_87, 0) + del combine_87 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_273 = paddle._C_ops.reshape(reshape_90, stack_85) + del stack_85 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_61 = paddle._C_ops.transpose(reshape_273, [0, 1, 3, 2, 4, 5]) + del reshape_273 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_91 = paddle._C_ops.reshape(transpose_61, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_92 = paddle._C_ops.reshape(reshape_91, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_63 = paddle._C_ops.shape64(reshape_92) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_107 = paddle._C_ops.slice( + shape64_63, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_63 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_52 = paddle._C_ops.matmul(reshape_92, parameter_172, False, False) + del parameter_172 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_71 = paddle._C_ops.add(matmul_52, parameter_171) + del parameter_171 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_88 = [slice_107, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_86 = paddle._C_ops.stack(combine_88, 0) + del combine_88 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_274 = paddle._C_ops.reshape(add_71, stack_86) + del stack_86 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_62 = paddle._C_ops.transpose(reshape_274, [2, 0, 3, 1, 4]) + del reshape_274 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_108 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_109 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_108, full_0, float("0"), True) + del slice_108 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_63 = paddle._C_ops.transpose(slice_109, [0, 1, 3, 2]) + del slice_109 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_53 = paddle._C_ops.matmul(scale_10, transpose_63, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_93 = paddle._C_ops.reshape(data_21, full_int_array_12) + del data_21 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_10 = paddle._C_ops.index_select(data_22, reshape_93, 0) + del data_22 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(index_select_10, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_64 = paddle._C_ops.transpose(reshape_275, [2, 0, 1]) + del reshape_275 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_64, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_193 = paddle._C_ops.add(matmul_53, unsqueeze_15) + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_10 = paddle._C_ops.softmax(add_193, -1) + del add_193 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_134 = paddle._C_ops.matmul(softmax_10, slice_10, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_65 = paddle._C_ops.transpose(matmul_134, [0, 2, 1, 3]) + del matmul_134 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_89 = [slice_107, full_29, full_44] + del slice_107 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_87 = paddle._C_ops.stack(combine_89, 0) + del combine_89 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_94 = paddle._C_ops.reshape(transpose_65, stack_87) + del stack_87 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_54 = paddle._C_ops.matmul(reshape_94, parameter_170, False, False) + del parameter_170 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_72 = paddle._C_ops.add(matmul_54, parameter_169) + del parameter_169 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_95 = paddle._C_ops.reshape(add_72, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_276 = paddle._C_ops.reshape(reshape_95, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_66 = paddle._C_ops.transpose(reshape_276, [0, 1, 3, 2, 4, 5]) + del reshape_276 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_66, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_90 = [slice_105, full_56, full_44] + del slice_105 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_88 = paddle._C_ops.stack(combine_90, 0) + del combine_90 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, stack_88) + del stack_88 + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], + float("0.782609"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_124 = full_11 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_64 = paddle._C_ops.shape64(reshape_97) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_110 = paddle._C_ops.slice( + shape64_64, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_64 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_91 = [slice_110, full_40, full_40] + del slice_110 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_89 = paddle._C_ops.stack(combine_91, 0) + del combine_91 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_18 = paddle._C_ops.uniform( + stack_89, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_89 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_194 = paddle._C_ops.add(full_11, uniform_18) + del uniform_18 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_18 = paddle._C_ops.floor(add_194) + del add_194 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_18 = paddle._C_ops.divide(reshape_97, full_11) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_18 = paddle._C_ops.multiply(divide_18, floor_18) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_73 = paddle._C_ops.add(add_70, multiply_18) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del parameter_166 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_74 = paddle._C_ops.add(matmul_55, parameter_165) + del parameter_165 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_10 = paddle._C_ops.gelu(add_74, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_56 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del parameter_164 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_75 = paddle._C_ops.add(matmul_56, parameter_163) + del parameter_163 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_65 = paddle._C_ops.shape64(add_75) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_111 = paddle._C_ops.slice( + shape64_65, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_65 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_92 = [slice_111, full_40, full_40] + del slice_111 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_90 = paddle._C_ops.stack(combine_92, 0) + del combine_92 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_19 = paddle._C_ops.uniform( + stack_90, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_90 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_195 = paddle._C_ops.add(full_11, uniform_19) + del uniform_19 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_19 = paddle._C_ops.floor(add_195) + del add_195 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_19 = paddle._C_ops.divide(add_75, full_11) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_19 = paddle._C_ops.multiply(divide_19, floor_19) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_76 = paddle._C_ops.add(add_73, multiply_19) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_66 = paddle._C_ops.shape64(add_76) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_112 = paddle._C_ops.slice( + shape64_66, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_66 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_93 = [slice_112, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_91 = paddle._C_ops.stack(combine_93, 0) + del combine_93 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(layer_norm_75, stack_91) + del stack_91 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_67 = paddle._C_ops.shape64(reshape_98) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_113 = paddle._C_ops.slice( + shape64_67, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_67 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_98, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_68 = paddle._C_ops.shape64(roll_10) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_114 = paddle._C_ops.slice( + shape64_68, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_68 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_94 = [slice_114, full_55, full_28, full_55, full_28, full_44] + del slice_114 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_92 = paddle._C_ops.stack(combine_94, 0) + del combine_94 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(roll_10, stack_92) + del stack_92 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_67 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_99 = paddle._C_ops.reshape(transpose_67, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(reshape_99, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_63 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_63, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_63 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_278 = paddle._C_ops.reshape(set_value__5, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_278, [0, 1, 3, 2, 4, 5]) + del reshape_278 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_279 = paddle._C_ops.reshape(transpose_151, full_int_array_31) + del transpose_151 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_280 = paddle._C_ops.reshape(reshape_279, full_int_array_32) + del reshape_279 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_0) + del reshape_280 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_52, unsqueeze_53) + del unsqueeze_52, unsqueeze_53 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_58, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_11 = paddle._C_ops.where(equal_5, full_59, where_10) + del equal_5, where_10 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_69 = paddle._C_ops.shape64(reshape_100) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_115 = paddle._C_ops.slice( + shape64_69, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_69 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_57 = paddle._C_ops.matmul(reshape_100, parameter_160, False, False) + del parameter_160 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_77 = paddle._C_ops.add(matmul_57, parameter_159) + del parameter_159 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_95 = [slice_115, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_93 = paddle._C_ops.stack(combine_95, 0) + del combine_95 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_281 = paddle._C_ops.reshape(add_77, stack_93) + del stack_93 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_68 = paddle._C_ops.transpose(reshape_281, [2, 0, 3, 1, 4]) + del reshape_281 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_116 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_117 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_116, full_0, float("0"), True) + del slice_116 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_69 = paddle._C_ops.transpose(slice_117, [0, 1, 3, 2]) + del slice_117 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_58 = paddle._C_ops.matmul(scale_11, transpose_69, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_101 = paddle._C_ops.reshape(data_23, full_int_array_12) + del data_23 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_11 = paddle._C_ops.index_select(data_24, reshape_101, 0) + del data_24 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_282 = paddle._C_ops.reshape(index_select_11, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_70 = paddle._C_ops.transpose(reshape_282, [2, 0, 1]) + del reshape_282 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(transpose_70, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_78 = paddle._C_ops.add(matmul_58, unsqueeze_16) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_5 = paddle._C_ops.floor_divide(slice_115, full_60) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_96 = [floor_divide_5, full_31, full_52, full_29, full_29] + del floor_divide_5 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_94 = paddle._C_ops.stack(combine_96, 0) + del combine_96 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_102 = paddle._C_ops.reshape(add_78, stack_94) + del stack_94 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(where_11, full_int_array_8) + del where_11 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_54, full_int_array_7) + del unsqueeze_54 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_79 = paddle._C_ops.add(reshape_102, unsqueeze_17) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_97 = [slice_115, full_52, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_95 = paddle._C_ops.stack(combine_97, 0) + del combine_97 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_283 = paddle._C_ops.reshape(add_79, stack_95) + del stack_95 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_11 = paddle._C_ops.softmax(reshape_283, -1) + del reshape_283 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_135 = paddle._C_ops.matmul(softmax_11, slice_11, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_71 = paddle._C_ops.transpose(matmul_135, [0, 2, 1, 3]) + del matmul_135 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_98 = [slice_115, full_29, full_44] + del slice_115 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_96 = paddle._C_ops.stack(combine_98, 0) + del combine_98 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_103 = paddle._C_ops.reshape(transpose_71, stack_96) + del stack_96 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_59 = paddle._C_ops.matmul(reshape_103, parameter_158, False, False) + del parameter_158 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_80 = paddle._C_ops.add(matmul_59, parameter_157) + del parameter_157 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(add_80, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(reshape_104, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_72 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_105 = paddle._C_ops.reshape(transpose_72, full_int_array_41) + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_105, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_99 = [slice_112, full_56, full_44] + del slice_112 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_97 = paddle._C_ops.stack(combine_99, 0) + del combine_99 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_106 = paddle._C_ops.reshape(roll_11, stack_97) + del stack_97 + + # pd_op.full: (xf32) <- () + full_12 = paddle._C_ops.full( + [], + float("0.76087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_135 = full_12 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_70 = paddle._C_ops.shape64(reshape_106) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_118 = paddle._C_ops.slice( + shape64_70, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_70 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_100 = [slice_118, full_40, full_40] + del slice_118 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_98 = paddle._C_ops.stack(combine_100, 0) + del combine_100 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_20 = paddle._C_ops.uniform( + stack_98, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_98 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_196 = paddle._C_ops.add(full_12, uniform_20) + del uniform_20 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_20 = paddle._C_ops.floor(add_196) + del add_196 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_20 = paddle._C_ops.divide(reshape_106, full_12) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_20 = paddle._C_ops.multiply(divide_20, floor_20) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_81 = paddle._C_ops.add(add_76, multiply_20) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del parameter_154 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_82 = paddle._C_ops.add(matmul_60, parameter_153) + del parameter_153 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_11 = paddle._C_ops.gelu(add_82, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_61 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del parameter_152 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_83 = paddle._C_ops.add(matmul_61, parameter_151) + del parameter_151 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_71 = paddle._C_ops.shape64(add_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_119 = paddle._C_ops.slice( + shape64_71, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_71 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_101 = [slice_119, full_40, full_40] + del slice_119 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_99 = paddle._C_ops.stack(combine_101, 0) + del combine_101 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_21 = paddle._C_ops.uniform( + stack_99, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_99 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_197 = paddle._C_ops.add(full_12, uniform_21) + del uniform_21 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_21 = paddle._C_ops.floor(add_197) + del add_197 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_21 = paddle._C_ops.divide(add_83, full_12) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_21 = paddle._C_ops.multiply(divide_21, floor_21) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_84 = paddle._C_ops.add(add_81, multiply_21) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_72 = paddle._C_ops.shape64(add_84) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_120 = paddle._C_ops.slice( + shape64_72, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_72 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_84, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_102 = [slice_120, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_100 = paddle._C_ops.stack(combine_102, 0) + del combine_102 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(layer_norm_81, stack_100) + del stack_100 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_73 = paddle._C_ops.shape64(reshape_107) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_121 = paddle._C_ops.slice( + shape64_73, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_73 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_103 = [slice_121, full_55, full_28, full_55, full_28, full_44] + del slice_121 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_101 = paddle._C_ops.stack(combine_103, 0) + del combine_103 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_285 = paddle._C_ops.reshape(reshape_107, stack_101) + del stack_101 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_73 = paddle._C_ops.transpose(reshape_285, [0, 1, 3, 2, 4, 5]) + del reshape_285 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_73, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_74 = paddle._C_ops.shape64(reshape_109) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_122 = paddle._C_ops.slice( + shape64_74, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_74 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_62 = paddle._C_ops.matmul(reshape_109, parameter_148, False, False) + del parameter_148 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_85 = paddle._C_ops.add(matmul_62, parameter_147) + del parameter_147 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_104 = [slice_122, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_102 = paddle._C_ops.stack(combine_104, 0) + del combine_104 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_286 = paddle._C_ops.reshape(add_85, stack_102) + del stack_102 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_286, [2, 0, 3, 1, 4]) + del reshape_286 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_123 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_124 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_123, full_0, float("0"), True) + del slice_123 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_124, [0, 1, 3, 2]) + del slice_124 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_63 = paddle._C_ops.matmul(scale_12, transpose_75, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_110 = paddle._C_ops.reshape(data_25, full_int_array_12) + del data_25 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_12 = paddle._C_ops.index_select(data_26, reshape_110, 0) + del data_26 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_287 = paddle._C_ops.reshape(index_select_12, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_76 = paddle._C_ops.transpose(reshape_287, [2, 0, 1]) + del reshape_287 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_198 = paddle._C_ops.add(matmul_63, unsqueeze_18) + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_12 = paddle._C_ops.softmax(add_198, -1) + del add_198 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_136 = paddle._C_ops.matmul(softmax_12, slice_12, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_136, [0, 2, 1, 3]) + del matmul_136 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_105 = [slice_122, full_29, full_44] + del slice_122 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_103 = paddle._C_ops.stack(combine_105, 0) + del combine_105 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(transpose_77, stack_103) + del stack_103 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_64 = paddle._C_ops.matmul(reshape_111, parameter_146, False, False) + del parameter_146 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_86 = paddle._C_ops.add(matmul_64, parameter_145) + del parameter_145 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_112 = paddle._C_ops.reshape(add_86, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_288 = paddle._C_ops.reshape(reshape_112, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_78 = paddle._C_ops.transpose(reshape_288, [0, 1, 3, 2, 4, 5]) + del reshape_288 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_113 = paddle._C_ops.reshape(transpose_78, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_106 = [slice_120, full_56, full_44] + del slice_120 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_104 = paddle._C_ops.stack(combine_106, 0) + del combine_106 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_114 = paddle._C_ops.reshape(reshape_113, stack_104) + del stack_104 + + # pd_op.full: (xf32) <- () + full_13 = paddle._C_ops.full( + [], + float("0.73913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_144 = full_13 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_75 = paddle._C_ops.shape64(reshape_114) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_125 = paddle._C_ops.slice( + shape64_75, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_75 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_107 = [slice_125, full_40, full_40] + del slice_125 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_105 = paddle._C_ops.stack(combine_107, 0) + del combine_107 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_22 = paddle._C_ops.uniform( + stack_105, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_105 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_199 = paddle._C_ops.add(full_13, uniform_22) + del uniform_22 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_22 = paddle._C_ops.floor(add_199) + del add_199 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_22 = paddle._C_ops.divide(reshape_114, full_13) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_22 = paddle._C_ops.multiply(divide_22, floor_22) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_87 = paddle._C_ops.add(add_84, multiply_22) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del parameter_142 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_88 = paddle._C_ops.add(matmul_65, parameter_141) + del parameter_141 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_12 = paddle._C_ops.gelu(add_88, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_66 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del parameter_140 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_89 = paddle._C_ops.add(matmul_66, parameter_139) + del parameter_139 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_76 = paddle._C_ops.shape64(add_89) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_126 = paddle._C_ops.slice( + shape64_76, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_76 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_108 = [slice_126, full_40, full_40] + del slice_126 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_106 = paddle._C_ops.stack(combine_108, 0) + del combine_108 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_23 = paddle._C_ops.uniform( + stack_106, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_106 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_200 = paddle._C_ops.add(full_13, uniform_23) + del uniform_23 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_23 = paddle._C_ops.floor(add_200) + del add_200 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_23 = paddle._C_ops.divide(add_89, full_13) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_23 = paddle._C_ops.multiply(divide_23, floor_23) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_90 = paddle._C_ops.add(add_87, multiply_23) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_77 = paddle._C_ops.shape64(add_90) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_127 = paddle._C_ops.slice( + shape64_77, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_77 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_109 = [slice_127, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_107 = paddle._C_ops.stack(combine_109, 0) + del combine_109 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(layer_norm_87, stack_107) + del stack_107 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_78 = paddle._C_ops.shape64(reshape_115) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_128 = paddle._C_ops.slice( + shape64_78, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_78 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_115, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_79 = paddle._C_ops.shape64(roll_12) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_129 = paddle._C_ops.slice( + shape64_79, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_79 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_110 = [slice_129, full_55, full_28, full_55, full_28, full_44] + del slice_129 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_108 = paddle._C_ops.stack(combine_110, 0) + del combine_110 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_289 = paddle._C_ops.reshape(roll_12, stack_108) + del stack_108 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_79 = paddle._C_ops.transpose(reshape_289, [0, 1, 3, 2, 4, 5]) + del reshape_289 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_116 = paddle._C_ops.reshape(transpose_79, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_117 = paddle._C_ops.reshape(reshape_116, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_64 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_64, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_64 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_290 = paddle._C_ops.reshape(set_value__6, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_152 = paddle._C_ops.transpose(reshape_290, [0, 1, 3, 2, 4, 5]) + del reshape_290 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(transpose_152, full_int_array_31) + del transpose_152 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_292 = paddle._C_ops.reshape(reshape_291, full_int_array_32) + del reshape_291 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_0) + del reshape_292 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_58, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_13 = paddle._C_ops.where(equal_6, full_59, where_12) + del equal_6, where_12 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_80 = paddle._C_ops.shape64(reshape_117) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_130 = paddle._C_ops.slice( + shape64_80, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_80 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_67 = paddle._C_ops.matmul(reshape_117, parameter_136, False, False) + del parameter_136 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_91 = paddle._C_ops.add(matmul_67, parameter_135) + del parameter_135 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_111 = [slice_130, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_109 = paddle._C_ops.stack(combine_111, 0) + del combine_111 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_293 = paddle._C_ops.reshape(add_91, stack_109) + del stack_109 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_293, [2, 0, 3, 1, 4]) + del reshape_293 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_131 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_132 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_131, full_0, float("0"), True) + del slice_131 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_132, [0, 1, 3, 2]) + del slice_132 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_68 = paddle._C_ops.matmul(scale_13, transpose_81, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_118 = paddle._C_ops.reshape(data_27, full_int_array_12) + del data_27 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_13 = paddle._C_ops.index_select(data_28, reshape_118, 0) + del data_28 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_294 = paddle._C_ops.reshape(index_select_13, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_82 = paddle._C_ops.transpose(reshape_294, [2, 0, 1]) + del reshape_294 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_92 = paddle._C_ops.add(matmul_68, unsqueeze_19) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_6 = paddle._C_ops.floor_divide(slice_130, full_60) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_112 = [floor_divide_6, full_31, full_52, full_29, full_29] + del floor_divide_6 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_110 = paddle._C_ops.stack(combine_112, 0) + del combine_112 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_119 = paddle._C_ops.reshape(add_92, stack_110) + del stack_110 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(where_13, full_int_array_8) + del where_13 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(unsqueeze_57, full_int_array_7) + del unsqueeze_57 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_93 = paddle._C_ops.add(reshape_119, unsqueeze_20) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_113 = [slice_130, full_52, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_111 = paddle._C_ops.stack(combine_113, 0) + del combine_113 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(add_93, stack_111) + del stack_111 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_13 = paddle._C_ops.softmax(reshape_295, -1) + del reshape_295 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_13, slice_13, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_114 = [slice_130, full_29, full_44] + del slice_130 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_112 = paddle._C_ops.stack(combine_114, 0) + del combine_114 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_120 = paddle._C_ops.reshape(transpose_83, stack_112) + del stack_112 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_69 = paddle._C_ops.matmul(reshape_120, parameter_134, False, False) + del parameter_134 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_94 = paddle._C_ops.add(matmul_69, parameter_133) + del parameter_133 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(add_94, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_296 = paddle._C_ops.reshape(reshape_121, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_84 = paddle._C_ops.transpose(reshape_296, [0, 1, 3, 2, 4, 5]) + del reshape_296 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_122 = paddle._C_ops.reshape(transpose_84, full_int_array_41) + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_122, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_115 = [slice_127, full_56, full_44] + del slice_127 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_113 = paddle._C_ops.stack(combine_115, 0) + del combine_115 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_123 = paddle._C_ops.reshape(roll_13, stack_113) + del stack_113 + + # pd_op.full: (xf32) <- () + full_14 = paddle._C_ops.full( + [], + float("0.717391"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_155 = full_14 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_81 = paddle._C_ops.shape64(reshape_123) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_133 = paddle._C_ops.slice( + shape64_81, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_81 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_116 = [slice_133, full_40, full_40] + del slice_133 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_114 = paddle._C_ops.stack(combine_116, 0) + del combine_116 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_24 = paddle._C_ops.uniform( + stack_114, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_114 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_201 = paddle._C_ops.add(full_14, uniform_24) + del uniform_24 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_24 = paddle._C_ops.floor(add_201) + del add_201 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_24 = paddle._C_ops.divide(reshape_123, full_14) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_24 = paddle._C_ops.multiply(divide_24, floor_24) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_95 = paddle._C_ops.add(add_90, multiply_24) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del parameter_130 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_96 = paddle._C_ops.add(matmul_70, parameter_129) + del parameter_129 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_13 = paddle._C_ops.gelu(add_96, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_71 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del parameter_128 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_97 = paddle._C_ops.add(matmul_71, parameter_127) + del parameter_127 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_82 = paddle._C_ops.shape64(add_97) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_134 = paddle._C_ops.slice( + shape64_82, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_82 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_117 = [slice_134, full_40, full_40] + del slice_134 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_115 = paddle._C_ops.stack(combine_117, 0) + del combine_117 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_25 = paddle._C_ops.uniform( + stack_115, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_115 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_202 = paddle._C_ops.add(full_14, uniform_25) + del uniform_25 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_25 = paddle._C_ops.floor(add_202) + del add_202 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_25 = paddle._C_ops.divide(add_97, full_14) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_25 = paddle._C_ops.multiply(divide_25, floor_25) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_98 = paddle._C_ops.add(add_95, multiply_25) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_83 = paddle._C_ops.shape64(add_98) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_135 = paddle._C_ops.slice( + shape64_83, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_83 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_118 = [slice_135, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_116 = paddle._C_ops.stack(combine_118, 0) + del combine_118 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_124 = paddle._C_ops.reshape(layer_norm_93, stack_116) + del stack_116 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_84 = paddle._C_ops.shape64(reshape_124) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_136 = paddle._C_ops.slice( + shape64_84, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_84 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_119 = [slice_136, full_55, full_28, full_55, full_28, full_44] + del slice_136 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_117 = paddle._C_ops.stack(combine_119, 0) + del combine_119 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_297 = paddle._C_ops.reshape(reshape_124, stack_117) + del stack_117 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_85 = paddle._C_ops.transpose(reshape_297, [0, 1, 3, 2, 4, 5]) + del reshape_297 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_85, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_85 = paddle._C_ops.shape64(reshape_126) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_137 = paddle._C_ops.slice( + shape64_85, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_85 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_72 = paddle._C_ops.matmul(reshape_126, parameter_124, False, False) + del parameter_124 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_99 = paddle._C_ops.add(matmul_72, parameter_123) + del parameter_123 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_120 = [slice_137, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_118 = paddle._C_ops.stack(combine_120, 0) + del combine_120 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_298 = paddle._C_ops.reshape(add_99, stack_118) + del stack_118 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_86 = paddle._C_ops.transpose(reshape_298, [2, 0, 3, 1, 4]) + del reshape_298 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_138 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_139 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_138, full_0, float("0"), True) + del slice_138 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_87 = paddle._C_ops.transpose(slice_139, [0, 1, 3, 2]) + del slice_139 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_73 = paddle._C_ops.matmul(scale_14, transpose_87, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_127 = paddle._C_ops.reshape(data_29, full_int_array_12) + del data_29 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_14 = paddle._C_ops.index_select(data_30, reshape_127, 0) + del data_30 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_299 = paddle._C_ops.reshape(index_select_14, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_88 = paddle._C_ops.transpose(reshape_299, [2, 0, 1]) + del reshape_299 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_88, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_203 = paddle._C_ops.add(matmul_73, unsqueeze_21) + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_14 = paddle._C_ops.softmax(add_203, -1) + del add_203 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_138 = paddle._C_ops.matmul(softmax_14, slice_14, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_89 = paddle._C_ops.transpose(matmul_138, [0, 2, 1, 3]) + del matmul_138 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_121 = [slice_137, full_29, full_44] + del slice_137 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_119 = paddle._C_ops.stack(combine_121, 0) + del combine_121 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_128 = paddle._C_ops.reshape(transpose_89, stack_119) + del stack_119 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_74 = paddle._C_ops.matmul(reshape_128, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_100 = paddle._C_ops.add(matmul_74, parameter_121) + del parameter_121 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_129 = paddle._C_ops.reshape(add_100, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_300 = paddle._C_ops.reshape(reshape_129, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_90 = paddle._C_ops.transpose(reshape_300, [0, 1, 3, 2, 4, 5]) + del reshape_300 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_130 = paddle._C_ops.reshape(transpose_90, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_122 = [slice_135, full_56, full_44] + del slice_135 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_120 = paddle._C_ops.stack(combine_122, 0) + del combine_122 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_131 = paddle._C_ops.reshape(reshape_130, stack_120) + del stack_120 + + # pd_op.full: (xf32) <- () + full_15 = paddle._C_ops.full( + [], + float("0.695652"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_164 = full_15 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_86 = paddle._C_ops.shape64(reshape_131) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_140 = paddle._C_ops.slice( + shape64_86, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_86 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_123 = [slice_140, full_40, full_40] + del slice_140 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_121 = paddle._C_ops.stack(combine_123, 0) + del combine_123 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_26 = paddle._C_ops.uniform( + stack_121, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_121 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_204 = paddle._C_ops.add(full_15, uniform_26) + del uniform_26 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_26 = paddle._C_ops.floor(add_204) + del add_204 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_26 = paddle._C_ops.divide(reshape_131, full_15) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_26 = paddle._C_ops.multiply(divide_26, floor_26) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_101 = paddle._C_ops.add(add_98, multiply_26) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del parameter_118 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_102 = paddle._C_ops.add(matmul_75, parameter_117) + del parameter_117 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_14 = paddle._C_ops.gelu(add_102, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_76 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del parameter_116 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_103 = paddle._C_ops.add(matmul_76, parameter_115) + del parameter_115 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_87 = paddle._C_ops.shape64(add_103) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_141 = paddle._C_ops.slice( + shape64_87, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_87 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_124 = [slice_141, full_40, full_40] + del slice_141 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_122 = paddle._C_ops.stack(combine_124, 0) + del combine_124 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_27 = paddle._C_ops.uniform( + stack_122, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_122 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_205 = paddle._C_ops.add(full_15, uniform_27) + del uniform_27 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_27 = paddle._C_ops.floor(add_205) + del add_205 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_27 = paddle._C_ops.divide(add_103, full_15) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_27 = paddle._C_ops.multiply(divide_27, floor_27) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_104 = paddle._C_ops.add(add_101, multiply_27) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_88 = paddle._C_ops.shape64(add_104) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_142 = paddle._C_ops.slice( + shape64_88, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_88 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_104, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_125 = [slice_142, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_123 = paddle._C_ops.stack(combine_125, 0) + del combine_125 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_132 = paddle._C_ops.reshape(layer_norm_99, stack_123) + del stack_123 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_89 = paddle._C_ops.shape64(reshape_132) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_143 = paddle._C_ops.slice( + shape64_89, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_89 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_132, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_90 = paddle._C_ops.shape64(roll_14) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_144 = paddle._C_ops.slice( + shape64_90, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_126 = [slice_144, full_55, full_28, full_55, full_28, full_44] + del slice_144 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_124 = paddle._C_ops.stack(combine_126, 0) + del combine_126 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_301 = paddle._C_ops.reshape(roll_14, stack_124) + del stack_124 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_91 = paddle._C_ops.transpose(reshape_301, [0, 1, 3, 2, 4, 5]) + del reshape_301 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_91, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_65 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_65, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_65 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_302 = paddle._C_ops.reshape(set_value__7, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_153 = paddle._C_ops.transpose(reshape_302, [0, 1, 3, 2, 4, 5]) + del reshape_302 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_303 = paddle._C_ops.reshape(transpose_153, full_int_array_31) + del transpose_153 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_304 = paddle._C_ops.reshape(reshape_303, full_int_array_32) + del reshape_303 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_0) + del reshape_304 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_58, unsqueeze_59) + del unsqueeze_58, unsqueeze_59 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_58, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_15 = paddle._C_ops.where(equal_7, full_59, where_14) + del equal_7, where_14 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_91 = paddle._C_ops.shape64(reshape_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_145 = paddle._C_ops.slice( + shape64_91, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_91 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_77 = paddle._C_ops.matmul(reshape_134, parameter_112, False, False) + del parameter_112 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_105 = paddle._C_ops.add(matmul_77, parameter_111) + del parameter_111 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_127 = [slice_145, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_125 = paddle._C_ops.stack(combine_127, 0) + del combine_127 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_305 = paddle._C_ops.reshape(add_105, stack_125) + del stack_125 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_92 = paddle._C_ops.transpose(reshape_305, [2, 0, 3, 1, 4]) + del reshape_305 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_146 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_147 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_146, full_0, float("0"), True) + del slice_146 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_93 = paddle._C_ops.transpose(slice_147, [0, 1, 3, 2]) + del slice_147 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_78 = paddle._C_ops.matmul(scale_15, transpose_93, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_135 = paddle._C_ops.reshape(data_31, full_int_array_12) + del data_31 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_15 = paddle._C_ops.index_select(data_32, reshape_135, 0) + del data_32 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_306 = paddle._C_ops.reshape(index_select_15, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_94 = paddle._C_ops.transpose(reshape_306, [2, 0, 1]) + del reshape_306 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(transpose_94, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_106 = paddle._C_ops.add(matmul_78, unsqueeze_22) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_7 = paddle._C_ops.floor_divide(slice_145, full_60) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_128 = [floor_divide_7, full_31, full_52, full_29, full_29] + del floor_divide_7 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_126 = paddle._C_ops.stack(combine_128, 0) + del combine_128 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_136 = paddle._C_ops.reshape(add_106, stack_126) + del stack_126 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(where_15, full_int_array_8) + del where_15 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_60, full_int_array_7) + del unsqueeze_60 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_107 = paddle._C_ops.add(reshape_136, unsqueeze_23) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_129 = [slice_145, full_52, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_127 = paddle._C_ops.stack(combine_129, 0) + del combine_129 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(add_107, stack_127) + del stack_127 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_15 = paddle._C_ops.softmax(reshape_307, -1) + del reshape_307 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_139 = paddle._C_ops.matmul(softmax_15, slice_15, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_95 = paddle._C_ops.transpose(matmul_139, [0, 2, 1, 3]) + del matmul_139 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_130 = [slice_145, full_29, full_44] + del slice_145 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_128 = paddle._C_ops.stack(combine_130, 0) + del combine_130 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_137 = paddle._C_ops.reshape(transpose_95, stack_128) + del stack_128 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_79 = paddle._C_ops.matmul(reshape_137, parameter_110, False, False) + del parameter_110 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_108 = paddle._C_ops.add(matmul_79, parameter_109) + del parameter_109 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_138 = paddle._C_ops.reshape(add_108, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_308 = paddle._C_ops.reshape(reshape_138, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_96 = paddle._C_ops.transpose(reshape_308, [0, 1, 3, 2, 4, 5]) + del reshape_308 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_139 = paddle._C_ops.reshape(transpose_96, full_int_array_41) + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_139, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_131 = [slice_142, full_56, full_44] + del slice_142 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_129 = paddle._C_ops.stack(combine_131, 0) + del combine_131 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_140 = paddle._C_ops.reshape(roll_15, stack_129) + del stack_129 + + # pd_op.full: (xf32) <- () + full_16 = paddle._C_ops.full( + [], + float("0.673913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_175 = full_16 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_92 = paddle._C_ops.shape64(reshape_140) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_148 = paddle._C_ops.slice( + shape64_92, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_92 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_132 = [slice_148, full_40, full_40] + del slice_148 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_130 = paddle._C_ops.stack(combine_132, 0) + del combine_132 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_28 = paddle._C_ops.uniform( + stack_130, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_130 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_206 = paddle._C_ops.add(full_16, uniform_28) + del uniform_28 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_28 = paddle._C_ops.floor(add_206) + del add_206 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_28 = paddle._C_ops.divide(reshape_140, full_16) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_28 = paddle._C_ops.multiply(divide_28, floor_28) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_109 = paddle._C_ops.add(add_104, multiply_28) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_109, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del parameter_106 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_110 = paddle._C_ops.add(matmul_80, parameter_105) + del parameter_105 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_15 = paddle._C_ops.gelu(add_110, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_81 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del parameter_104 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_111 = paddle._C_ops.add(matmul_81, parameter_103) + del parameter_103 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_93 = paddle._C_ops.shape64(add_111) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_149 = paddle._C_ops.slice( + shape64_93, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_93 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_133 = [slice_149, full_40, full_40] + del slice_149 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_131 = paddle._C_ops.stack(combine_133, 0) + del combine_133 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_29 = paddle._C_ops.uniform( + stack_131, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_131 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_207 = paddle._C_ops.add(full_16, uniform_29) + del uniform_29 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_29 = paddle._C_ops.floor(add_207) + del add_207 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_29 = paddle._C_ops.divide(add_111, full_16) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_29 = paddle._C_ops.multiply(divide_29, floor_29) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_112 = paddle._C_ops.add(add_109, multiply_29) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_94 = paddle._C_ops.shape64(add_112) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_150 = paddle._C_ops.slice( + shape64_94, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_94 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_112, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_134 = [slice_150, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_132 = paddle._C_ops.stack(combine_134, 0) + del combine_134 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(layer_norm_105, stack_132) + del stack_132 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_95 = paddle._C_ops.shape64(reshape_141) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_151 = paddle._C_ops.slice( + shape64_95, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_95 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_135 = [slice_151, full_55, full_28, full_55, full_28, full_44] + del slice_151 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_133 = paddle._C_ops.stack(combine_135, 0) + del combine_135 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_309 = paddle._C_ops.reshape(reshape_141, stack_133) + del stack_133 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_97 = paddle._C_ops.transpose(reshape_309, [0, 1, 3, 2, 4, 5]) + del reshape_309 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_142 = paddle._C_ops.reshape(transpose_97, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_143 = paddle._C_ops.reshape(reshape_142, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_96 = paddle._C_ops.shape64(reshape_143) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_152 = paddle._C_ops.slice( + shape64_96, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_96 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_82 = paddle._C_ops.matmul(reshape_143, parameter_100, False, False) + del parameter_100 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_113 = paddle._C_ops.add(matmul_82, parameter_99) + del parameter_99 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_136 = [slice_152, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_134 = paddle._C_ops.stack(combine_136, 0) + del combine_136 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_310 = paddle._C_ops.reshape(add_113, stack_134) + del stack_134 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_98 = paddle._C_ops.transpose(reshape_310, [2, 0, 3, 1, 4]) + del reshape_310 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_153 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_154 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_153, full_0, float("0"), True) + del slice_153 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_99 = paddle._C_ops.transpose(slice_154, [0, 1, 3, 2]) + del slice_154 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_83 = paddle._C_ops.matmul(scale_16, transpose_99, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_144 = paddle._C_ops.reshape(data_33, full_int_array_12) + del data_33 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_16 = paddle._C_ops.index_select(data_34, reshape_144, 0) + del data_34 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_311 = paddle._C_ops.reshape(index_select_16, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_100 = paddle._C_ops.transpose(reshape_311, [2, 0, 1]) + del reshape_311 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_100, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_208 = paddle._C_ops.add(matmul_83, unsqueeze_24) + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_16 = paddle._C_ops.softmax(add_208, -1) + del add_208 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_140 = paddle._C_ops.matmul(softmax_16, slice_16, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_101 = paddle._C_ops.transpose(matmul_140, [0, 2, 1, 3]) + del matmul_140 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_137 = [slice_152, full_29, full_44] + del slice_152 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_135 = paddle._C_ops.stack(combine_137, 0) + del combine_137 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_145 = paddle._C_ops.reshape(transpose_101, stack_135) + del stack_135 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_84 = paddle._C_ops.matmul(reshape_145, parameter_98, False, False) + del parameter_98 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_114 = paddle._C_ops.add(matmul_84, parameter_97) + del parameter_97 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_114, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_312 = paddle._C_ops.reshape(reshape_146, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_102 = paddle._C_ops.transpose(reshape_312, [0, 1, 3, 2, 4, 5]) + del reshape_312 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_147 = paddle._C_ops.reshape(transpose_102, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_138 = [slice_150, full_56, full_44] + del slice_150 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_136 = paddle._C_ops.stack(combine_138, 0) + del combine_138 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_148 = paddle._C_ops.reshape(reshape_147, stack_136) + del stack_136 + + # pd_op.full: (xf32) <- () + full_17 = paddle._C_ops.full( + [], + float("0.652174"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_184 = full_17 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_97 = paddle._C_ops.shape64(reshape_148) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_155 = paddle._C_ops.slice( + shape64_97, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_97 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_139 = [slice_155, full_40, full_40] + del slice_155 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_137 = paddle._C_ops.stack(combine_139, 0) + del combine_139 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_30 = paddle._C_ops.uniform( + stack_137, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_137 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_209 = paddle._C_ops.add(full_17, uniform_30) + del uniform_30 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_30 = paddle._C_ops.floor(add_209) + del add_209 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_30 = paddle._C_ops.divide(reshape_148, full_17) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_30 = paddle._C_ops.multiply(divide_30, floor_30) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_115 = paddle._C_ops.add(add_112, multiply_30) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_115, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_85 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del parameter_94 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_116 = paddle._C_ops.add(matmul_85, parameter_93) + del parameter_93 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_16 = paddle._C_ops.gelu(add_116, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_86 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del parameter_92 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_117 = paddle._C_ops.add(matmul_86, parameter_91) + del parameter_91 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_98 = paddle._C_ops.shape64(add_117) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_156 = paddle._C_ops.slice( + shape64_98, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_98 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_140 = [slice_156, full_40, full_40] + del slice_156 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_138 = paddle._C_ops.stack(combine_140, 0) + del combine_140 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_31 = paddle._C_ops.uniform( + stack_138, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_138 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_210 = paddle._C_ops.add(full_17, uniform_31) + del uniform_31 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_31 = paddle._C_ops.floor(add_210) + del add_210 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_31 = paddle._C_ops.divide(add_117, full_17) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_31 = paddle._C_ops.multiply(divide_31, floor_31) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_118 = paddle._C_ops.add(add_115, multiply_31) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_99 = paddle._C_ops.shape64(add_118) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_157 = paddle._C_ops.slice( + shape64_99, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_99 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_141 = [slice_157, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_139 = paddle._C_ops.stack(combine_141, 0) + del combine_141 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_149 = paddle._C_ops.reshape(layer_norm_111, stack_139) + del stack_139 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_100 = paddle._C_ops.shape64(reshape_149) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_158 = paddle._C_ops.slice( + shape64_100, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_100 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_149, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_101 = paddle._C_ops.shape64(roll_16) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_159 = paddle._C_ops.slice( + shape64_101, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_101 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_142 = [slice_159, full_55, full_28, full_55, full_28, full_44] + del slice_159 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_140 = paddle._C_ops.stack(combine_142, 0) + del combine_142 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(roll_16, stack_140) + del stack_140 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_103 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_103, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(reshape_150, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_66 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_66, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_66 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_314 = paddle._C_ops.reshape(set_value__8, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_154 = paddle._C_ops.transpose(reshape_314, [0, 1, 3, 2, 4, 5]) + del reshape_314 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_315 = paddle._C_ops.reshape(transpose_154, full_int_array_31) + del transpose_154 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_316 = paddle._C_ops.reshape(reshape_315, full_int_array_32) + del reshape_315 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_0) + del reshape_316 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_58, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_17 = paddle._C_ops.where(equal_8, full_59, where_16) + del equal_8, where_16 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_102 = paddle._C_ops.shape64(reshape_151) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_160 = paddle._C_ops.slice( + shape64_102, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_102 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_87 = paddle._C_ops.matmul(reshape_151, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_119 = paddle._C_ops.add(matmul_87, parameter_87) + del parameter_87 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_143 = [slice_160, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_141 = paddle._C_ops.stack(combine_143, 0) + del combine_143 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_317 = paddle._C_ops.reshape(add_119, stack_141) + del stack_141 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_104 = paddle._C_ops.transpose(reshape_317, [2, 0, 3, 1, 4]) + del reshape_317 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_161 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_162 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_161, full_0, float("0"), True) + del slice_161 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_105 = paddle._C_ops.transpose(slice_162, [0, 1, 3, 2]) + del slice_162 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_88 = paddle._C_ops.matmul(scale_17, transpose_105, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_152 = paddle._C_ops.reshape(data_35, full_int_array_12) + del data_35 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_17 = paddle._C_ops.index_select(data_36, reshape_152, 0) + del data_36 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_17, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_106 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(transpose_106, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_120 = paddle._C_ops.add(matmul_88, unsqueeze_25) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_8 = paddle._C_ops.floor_divide(slice_160, full_60) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_144 = [floor_divide_8, full_31, full_52, full_29, full_29] + del floor_divide_8 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_142 = paddle._C_ops.stack(combine_144, 0) + del combine_144 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_153 = paddle._C_ops.reshape(add_120, stack_142) + del stack_142 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(where_17, full_int_array_8) + del where_17 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(unsqueeze_63, full_int_array_7) + del unsqueeze_63 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_121 = paddle._C_ops.add(reshape_153, unsqueeze_26) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_145 = [slice_160, full_52, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_143 = paddle._C_ops.stack(combine_145, 0) + del combine_145 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_319 = paddle._C_ops.reshape(add_121, stack_143) + del stack_143 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_17 = paddle._C_ops.softmax(reshape_319, -1) + del reshape_319 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_141 = paddle._C_ops.matmul(softmax_17, slice_17, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_107 = paddle._C_ops.transpose(matmul_141, [0, 2, 1, 3]) + del matmul_141 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_146 = [slice_160, full_29, full_44] + del slice_160 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_144 = paddle._C_ops.stack(combine_146, 0) + del combine_146 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_154 = paddle._C_ops.reshape(transpose_107, stack_144) + del stack_144 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_89 = paddle._C_ops.matmul(reshape_154, parameter_86, False, False) + del parameter_86 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_122 = paddle._C_ops.add(matmul_89, parameter_85) + del parameter_85 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_155 = paddle._C_ops.reshape(add_122, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_320 = paddle._C_ops.reshape(reshape_155, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_108 = paddle._C_ops.transpose(reshape_320, [0, 1, 3, 2, 4, 5]) + del reshape_320 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_156 = paddle._C_ops.reshape(transpose_108, full_int_array_41) + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_156, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_147 = [slice_157, full_56, full_44] + del slice_157 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_145 = paddle._C_ops.stack(combine_147, 0) + del combine_147 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_157 = paddle._C_ops.reshape(roll_17, stack_145) + del stack_145 + + # pd_op.full: (xf32) <- () + full_18 = paddle._C_ops.full( + [], + float("0.630435"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_195 = full_18 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_103 = paddle._C_ops.shape64(reshape_157) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_163 = paddle._C_ops.slice( + shape64_103, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_103 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_148 = [slice_163, full_40, full_40] + del slice_163 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_146 = paddle._C_ops.stack(combine_148, 0) + del combine_148 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_32 = paddle._C_ops.uniform( + stack_146, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_146 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_211 = paddle._C_ops.add(full_18, uniform_32) + del uniform_32 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_32 = paddle._C_ops.floor(add_211) + del add_211 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_32 = paddle._C_ops.divide(reshape_157, full_18) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_32 = paddle._C_ops.multiply(divide_32, floor_32) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_123 = paddle._C_ops.add(add_118, multiply_32) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_123, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del parameter_82 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_124 = paddle._C_ops.add(matmul_90, parameter_81) + del parameter_81 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_17 = paddle._C_ops.gelu(add_124, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_91 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del parameter_80 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_125 = paddle._C_ops.add(matmul_91, parameter_79) + del parameter_79 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_104 = paddle._C_ops.shape64(add_125) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_164 = paddle._C_ops.slice( + shape64_104, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_104 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_149 = [slice_164, full_40, full_40] + del slice_164 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_147 = paddle._C_ops.stack(combine_149, 0) + del combine_149 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_33 = paddle._C_ops.uniform( + stack_147, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_147 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_212 = paddle._C_ops.add(full_18, uniform_33) + del uniform_33 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_33 = paddle._C_ops.floor(add_212) + del add_212 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_33 = paddle._C_ops.divide(add_125, full_18) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_33 = paddle._C_ops.multiply(divide_33, floor_33) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_126 = paddle._C_ops.add(add_123, multiply_33) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_105 = paddle._C_ops.shape64(add_126) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_165 = paddle._C_ops.slice( + shape64_105, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_105 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_126, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_150 = [slice_165, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_148 = paddle._C_ops.stack(combine_150, 0) + del combine_150 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_158 = paddle._C_ops.reshape(layer_norm_117, stack_148) + del stack_148 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_106 = paddle._C_ops.shape64(reshape_158) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_166 = paddle._C_ops.slice( + shape64_106, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_106 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_151 = [slice_166, full_55, full_28, full_55, full_28, full_44] + del slice_166 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_149 = paddle._C_ops.stack(combine_151, 0) + del combine_151 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_321 = paddle._C_ops.reshape(reshape_158, stack_149) + del stack_149 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_109 = paddle._C_ops.transpose(reshape_321, [0, 1, 3, 2, 4, 5]) + del reshape_321 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_159 = paddle._C_ops.reshape(transpose_109, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_160 = paddle._C_ops.reshape(reshape_159, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_107 = paddle._C_ops.shape64(reshape_160) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_167 = paddle._C_ops.slice( + shape64_107, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_107 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_92 = paddle._C_ops.matmul(reshape_160, parameter_76, False, False) + del parameter_76 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_127 = paddle._C_ops.add(matmul_92, parameter_75) + del parameter_75 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_152 = [slice_167, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_150 = paddle._C_ops.stack(combine_152, 0) + del combine_152 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_322 = paddle._C_ops.reshape(add_127, stack_150) + del stack_150 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_110 = paddle._C_ops.transpose(reshape_322, [2, 0, 3, 1, 4]) + del reshape_322 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_168 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_169 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_168, full_0, float("0"), True) + del slice_168 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_111 = paddle._C_ops.transpose(slice_169, [0, 1, 3, 2]) + del slice_169 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_93 = paddle._C_ops.matmul(scale_18, transpose_111, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_161 = paddle._C_ops.reshape(data_37, full_int_array_12) + del data_37 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_18 = paddle._C_ops.index_select(data_38, reshape_161, 0) + del data_38 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_323 = paddle._C_ops.reshape(index_select_18, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_112 = paddle._C_ops.transpose(reshape_323, [2, 0, 1]) + del reshape_323 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_112, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_213 = paddle._C_ops.add(matmul_93, unsqueeze_27) + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_18 = paddle._C_ops.softmax(add_213, -1) + del add_213 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_142 = paddle._C_ops.matmul(softmax_18, slice_18, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_113 = paddle._C_ops.transpose(matmul_142, [0, 2, 1, 3]) + del matmul_142 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_153 = [slice_167, full_29, full_44] + del slice_167 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_151 = paddle._C_ops.stack(combine_153, 0) + del combine_153 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_162 = paddle._C_ops.reshape(transpose_113, stack_151) + del stack_151 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_94 = paddle._C_ops.matmul(reshape_162, parameter_74, False, False) + del parameter_74 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_128 = paddle._C_ops.add(matmul_94, parameter_73) + del parameter_73 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_163 = paddle._C_ops.reshape(add_128, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_324 = paddle._C_ops.reshape(reshape_163, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_114 = paddle._C_ops.transpose(reshape_324, [0, 1, 3, 2, 4, 5]) + del reshape_324 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(transpose_114, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_154 = [slice_165, full_56, full_44] + del slice_165 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_152 = paddle._C_ops.stack(combine_154, 0) + del combine_154 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_165 = paddle._C_ops.reshape(reshape_164, stack_152) + del stack_152 + + # pd_op.full: (xf32) <- () + full_19 = paddle._C_ops.full( + [], + float("0.608696"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_204 = full_19 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_108 = paddle._C_ops.shape64(reshape_165) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_170 = paddle._C_ops.slice( + shape64_108, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_108 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_155 = [slice_170, full_40, full_40] + del slice_170 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_153 = paddle._C_ops.stack(combine_155, 0) + del combine_155 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_34 = paddle._C_ops.uniform( + stack_153, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_153 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_214 = paddle._C_ops.add(full_19, uniform_34) + del uniform_34 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_34 = paddle._C_ops.floor(add_214) + del add_214 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_34 = paddle._C_ops.divide(reshape_165, full_19) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_34 = paddle._C_ops.multiply(divide_34, floor_34) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_129 = paddle._C_ops.add(add_126, multiply_34) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_129, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del parameter_70 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_130 = paddle._C_ops.add(matmul_95, parameter_69) + del parameter_69 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_18 = paddle._C_ops.gelu(add_130, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_96 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del parameter_68 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_131 = paddle._C_ops.add(matmul_96, parameter_67) + del parameter_67 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_109 = paddle._C_ops.shape64(add_131) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_171 = paddle._C_ops.slice( + shape64_109, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_109 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_156 = [slice_171, full_40, full_40] + del slice_171 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_154 = paddle._C_ops.stack(combine_156, 0) + del combine_156 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_35 = paddle._C_ops.uniform( + stack_154, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_154 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_215 = paddle._C_ops.add(full_19, uniform_35) + del uniform_35 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_35 = paddle._C_ops.floor(add_215) + del add_215 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_35 = paddle._C_ops.divide(add_131, full_19) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_35 = paddle._C_ops.multiply(divide_35, floor_35) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_132 = paddle._C_ops.add(add_129, multiply_35) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_110 = paddle._C_ops.shape64(add_132) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_172 = paddle._C_ops.slice( + shape64_110, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_110 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_132, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_157 = [slice_172, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_155 = paddle._C_ops.stack(combine_157, 0) + del combine_157 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(layer_norm_123, stack_155) + del stack_155 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_111 = paddle._C_ops.shape64(reshape_166) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_173 = paddle._C_ops.slice( + shape64_111, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_111 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_166, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_112 = paddle._C_ops.shape64(roll_18) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_174 = paddle._C_ops.slice( + shape64_112, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_112 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_158 = [slice_174, full_55, full_28, full_55, full_28, full_44] + del slice_174 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_156 = paddle._C_ops.stack(combine_158, 0) + del combine_158 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_325 = paddle._C_ops.reshape(roll_18, stack_156) + del stack_156 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_115 = paddle._C_ops.transpose(reshape_325, [0, 1, 3, 2, 4, 5]) + del reshape_325 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_167 = paddle._C_ops.reshape(transpose_115, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_168 = paddle._C_ops.reshape(reshape_167, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_67 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_67, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_67 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_326 = paddle._C_ops.reshape(set_value__9, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_155 = paddle._C_ops.transpose(reshape_326, [0, 1, 3, 2, 4, 5]) + del reshape_326 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(transpose_155, full_int_array_31) + del transpose_155 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_328 = paddle._C_ops.reshape(reshape_327, full_int_array_32) + del reshape_327 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_0) + del reshape_328 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_64, unsqueeze_65) + del unsqueeze_64, unsqueeze_65 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_58, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_19 = paddle._C_ops.where(equal_9, full_59, where_18) + del equal_9, where_18 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_113 = paddle._C_ops.shape64(reshape_168) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_175 = paddle._C_ops.slice( + shape64_113, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_113 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_97 = paddle._C_ops.matmul(reshape_168, parameter_64, False, False) + del parameter_64 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_133 = paddle._C_ops.add(matmul_97, parameter_63) + del parameter_63 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_159 = [slice_175, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_157 = paddle._C_ops.stack(combine_159, 0) + del combine_159 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_329 = paddle._C_ops.reshape(add_133, stack_157) + del stack_157 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_116 = paddle._C_ops.transpose(reshape_329, [2, 0, 3, 1, 4]) + del reshape_329 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_176 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_177 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_176, full_0, float("0"), True) + del slice_176 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_117 = paddle._C_ops.transpose(slice_177, [0, 1, 3, 2]) + del slice_177 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_98 = paddle._C_ops.matmul(scale_19, transpose_117, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_169 = paddle._C_ops.reshape(data_39, full_int_array_12) + del data_39 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_19 = paddle._C_ops.index_select(data_40, reshape_169, 0) + del data_40 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_330 = paddle._C_ops.reshape(index_select_19, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_118 = paddle._C_ops.transpose(reshape_330, [2, 0, 1]) + del reshape_330 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(transpose_118, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_134 = paddle._C_ops.add(matmul_98, unsqueeze_28) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_9 = paddle._C_ops.floor_divide(slice_175, full_60) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_160 = [floor_divide_9, full_31, full_52, full_29, full_29] + del floor_divide_9 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_158 = paddle._C_ops.stack(combine_160, 0) + del combine_160 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_170 = paddle._C_ops.reshape(add_134, stack_158) + del stack_158 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(where_19, full_int_array_8) + del where_19 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_66, full_int_array_7) + del unsqueeze_66 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_135 = paddle._C_ops.add(reshape_170, unsqueeze_29) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_161 = [slice_175, full_52, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_159 = paddle._C_ops.stack(combine_161, 0) + del combine_161 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(add_135, stack_159) + del stack_159 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_19 = paddle._C_ops.softmax(reshape_331, -1) + del reshape_331 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_19, slice_19, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_119 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_162 = [slice_175, full_29, full_44] + del slice_175 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_160 = paddle._C_ops.stack(combine_162, 0) + del combine_162 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_171 = paddle._C_ops.reshape(transpose_119, stack_160) + del stack_160 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_99 = paddle._C_ops.matmul(reshape_171, parameter_62, False, False) + del parameter_62 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_136 = paddle._C_ops.add(matmul_99, parameter_61) + del parameter_61 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_172 = paddle._C_ops.reshape(add_136, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_332 = paddle._C_ops.reshape(reshape_172, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_120 = paddle._C_ops.transpose(reshape_332, [0, 1, 3, 2, 4, 5]) + del reshape_332 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_173 = paddle._C_ops.reshape(transpose_120, full_int_array_41) + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_173, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_163 = [slice_172, full_56, full_44] + del slice_172 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_161 = paddle._C_ops.stack(combine_163, 0) + del combine_163 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_174 = paddle._C_ops.reshape(roll_19, stack_161) + del stack_161 + + # pd_op.full: (xf32) <- () + full_20 = paddle._C_ops.full( + [], + float("0.586957"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_215 = full_20 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_114 = paddle._C_ops.shape64(reshape_174) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_178 = paddle._C_ops.slice( + shape64_114, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_114 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_164 = [slice_178, full_40, full_40] + del slice_178 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_162 = paddle._C_ops.stack(combine_164, 0) + del combine_164 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_36 = paddle._C_ops.uniform( + stack_162, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_162 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_216 = paddle._C_ops.add(full_20, uniform_36) + del uniform_36 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_36 = paddle._C_ops.floor(add_216) + del add_216 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_36 = paddle._C_ops.divide(reshape_174, full_20) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_36 = paddle._C_ops.multiply(divide_36, floor_36) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_137 = paddle._C_ops.add(add_132, multiply_36) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_137, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_100 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del parameter_58 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_138 = paddle._C_ops.add(matmul_100, parameter_57) + del parameter_57 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_19 = paddle._C_ops.gelu(add_138, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_101 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del parameter_56 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_139 = paddle._C_ops.add(matmul_101, parameter_55) + del parameter_55 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_115 = paddle._C_ops.shape64(add_139) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_179 = paddle._C_ops.slice( + shape64_115, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_115 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_165 = [slice_179, full_40, full_40] + del slice_179 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_163 = paddle._C_ops.stack(combine_165, 0) + del combine_165 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_37 = paddle._C_ops.uniform( + stack_163, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_163 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_217 = paddle._C_ops.add(full_20, uniform_37) + del uniform_37 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_37 = paddle._C_ops.floor(add_217) + del add_217 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_37 = paddle._C_ops.divide(add_139, full_20) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_37 = paddle._C_ops.multiply(divide_37, floor_37) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_140 = paddle._C_ops.add(add_137, multiply_37) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_116 = paddle._C_ops.shape64(add_140) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_180 = paddle._C_ops.slice( + shape64_116, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_116 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_166 = [slice_180, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_164 = paddle._C_ops.stack(combine_166, 0) + del combine_166 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(layer_norm_129, stack_164) + del stack_164 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_117 = paddle._C_ops.shape64(reshape_175) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_181 = paddle._C_ops.slice( + shape64_117, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_117 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_167 = [slice_181, full_55, full_28, full_55, full_28, full_44] + del slice_181 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_165 = paddle._C_ops.stack(combine_167, 0) + del combine_167 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_333 = paddle._C_ops.reshape(reshape_175, stack_165) + del stack_165 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_121 = paddle._C_ops.transpose(reshape_333, [0, 1, 3, 2, 4, 5]) + del reshape_333 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_176 = paddle._C_ops.reshape(transpose_121, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_177 = paddle._C_ops.reshape(reshape_176, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_118 = paddle._C_ops.shape64(reshape_177) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_182 = paddle._C_ops.slice( + shape64_118, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_118 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_102 = paddle._C_ops.matmul(reshape_177, parameter_52, False, False) + del parameter_52 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_141 = paddle._C_ops.add(matmul_102, parameter_51) + del parameter_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_168 = [slice_182, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_166 = paddle._C_ops.stack(combine_168, 0) + del combine_168 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_334 = paddle._C_ops.reshape(add_141, stack_166) + del stack_166 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_122 = paddle._C_ops.transpose(reshape_334, [2, 0, 3, 1, 4]) + del reshape_334 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_183 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_184 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_183, full_0, float("0"), True) + del slice_183 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_123 = paddle._C_ops.transpose(slice_184, [0, 1, 3, 2]) + del slice_184 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_103 = paddle._C_ops.matmul(scale_20, transpose_123, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_178 = paddle._C_ops.reshape(data_41, full_int_array_12) + del data_41 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_20 = paddle._C_ops.index_select(data_42, reshape_178, 0) + del data_42 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_20, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_124 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_124, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_218 = paddle._C_ops.add(matmul_103, unsqueeze_30) + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_20 = paddle._C_ops.softmax(add_218, -1) + del add_218 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_144 = paddle._C_ops.matmul(softmax_20, slice_20, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_125 = paddle._C_ops.transpose(matmul_144, [0, 2, 1, 3]) + del matmul_144 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_169 = [slice_182, full_29, full_44] + del slice_182 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_167 = paddle._C_ops.stack(combine_169, 0) + del combine_169 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_179 = paddle._C_ops.reshape(transpose_125, stack_167) + del stack_167 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_104 = paddle._C_ops.matmul(reshape_179, parameter_50, False, False) + del parameter_50 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_142 = paddle._C_ops.add(matmul_104, parameter_49) + del parameter_49 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_180 = paddle._C_ops.reshape(add_142, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_336 = paddle._C_ops.reshape(reshape_180, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_126 = paddle._C_ops.transpose(reshape_336, [0, 1, 3, 2, 4, 5]) + del reshape_336 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(transpose_126, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_170 = [slice_180, full_56, full_44] + del slice_180 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_168 = paddle._C_ops.stack(combine_170, 0) + del combine_170 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, stack_168) + del stack_168 + + # pd_op.full: (xf32) <- () + full_21 = paddle._C_ops.full( + [], + float("0.565217"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_224 = full_21 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_119 = paddle._C_ops.shape64(reshape_182) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_185 = paddle._C_ops.slice( + shape64_119, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_119 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_171 = [slice_185, full_40, full_40] + del slice_185 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_169 = paddle._C_ops.stack(combine_171, 0) + del combine_171 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_38 = paddle._C_ops.uniform( + stack_169, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_169 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_219 = paddle._C_ops.add(full_21, uniform_38) + del uniform_38 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_38 = paddle._C_ops.floor(add_219) + del add_219 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_38 = paddle._C_ops.divide(reshape_182, full_21) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_38 = paddle._C_ops.multiply(divide_38, floor_38) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_143 = paddle._C_ops.add(add_140, multiply_38) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_105 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del parameter_46 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_144 = paddle._C_ops.add(matmul_105, parameter_45) + del parameter_45 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_20 = paddle._C_ops.gelu(add_144, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_106 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del parameter_44 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_145 = paddle._C_ops.add(matmul_106, parameter_43) + del parameter_43 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_120 = paddle._C_ops.shape64(add_145) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_186 = paddle._C_ops.slice( + shape64_120, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_120 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_172 = [slice_186, full_40, full_40] + del slice_186 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_170 = paddle._C_ops.stack(combine_172, 0) + del combine_172 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_39 = paddle._C_ops.uniform( + stack_170, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_170 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_220 = paddle._C_ops.add(full_21, uniform_39) + del uniform_39 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_39 = paddle._C_ops.floor(add_220) + del add_220 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_39 = paddle._C_ops.divide(add_145, full_21) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_39 = paddle._C_ops.multiply(divide_39, floor_39) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_146 = paddle._C_ops.add(add_143, multiply_39) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_121 = paddle._C_ops.shape64(add_146) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_187 = paddle._C_ops.slice( + shape64_121, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_121 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_146, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_173 = [slice_187, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_171 = paddle._C_ops.stack(combine_173, 0) + del combine_173 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(layer_norm_135, stack_171) + del stack_171 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_122 = paddle._C_ops.shape64(reshape_183) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_188 = paddle._C_ops.slice( + shape64_122, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_122 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_183, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_123 = paddle._C_ops.shape64(roll_20) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_189 = paddle._C_ops.slice( + shape64_123, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_123 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_174 = [slice_189, full_55, full_28, full_55, full_28, full_44] + del full_55, slice_189 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_172 = paddle._C_ops.stack(combine_174, 0) + del combine_174 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_337 = paddle._C_ops.reshape(roll_20, stack_172) + del stack_172 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_127 = paddle._C_ops.transpose(reshape_337, [0, 1, 3, 2, 4, 5]) + del reshape_337 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_184 = paddle._C_ops.reshape(transpose_127, full_int_array_38) + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_185 = paddle._C_ops.reshape(reshape_184, full_int_array_39) + del full_int_array_39 + + # pd_op.full: (1x14x14x1xf32) <- () + full_68 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_68, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_68 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(set_value__10, full_int_array_42) + del full_int_array_42 + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_156 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_156, full_int_array_31) + del transpose_156 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, full_int_array_32) + del reshape_339 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_0) + del reshape_340 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_58, subtract_10) + del full_58, not_equal_10, subtract_10 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_21 = paddle._C_ops.where(equal_10, full_59, where_20) + del equal_10, full_59, where_20 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_124 = paddle._C_ops.shape64(reshape_185) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_190 = paddle._C_ops.slice( + shape64_124, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_124 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_107 = paddle._C_ops.matmul(reshape_185, parameter_40, False, False) + del parameter_40 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_147 = paddle._C_ops.add(matmul_107, parameter_39) + del parameter_39 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_175 = [slice_190, full_29, full_30, full_52, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_173 = paddle._C_ops.stack(combine_175, 0) + del combine_175 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_341 = paddle._C_ops.reshape(add_147, stack_173) + del stack_173 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_128 = paddle._C_ops.transpose(reshape_341, [2, 0, 3, 1, 4]) + del reshape_341 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_191 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_192 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_191, full_0, float("0"), True) + del slice_191 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_129 = paddle._C_ops.transpose(slice_192, [0, 1, 3, 2]) + del slice_192 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_108 = paddle._C_ops.matmul(scale_21, transpose_129, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_43, full_int_array_12) + del data_43 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_21 = paddle._C_ops.index_select(data_44, reshape_186, 0) + del data_44 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_342 = paddle._C_ops.reshape(index_select_21, full_int_array_13) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_130 = paddle._C_ops.transpose(reshape_342, [2, 0, 1]) + del reshape_342 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(transpose_130, full_int_array_7) + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_148 = paddle._C_ops.add(matmul_108, unsqueeze_31) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_10 = paddle._C_ops.floor_divide(slice_190, full_60) + del full_60 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_176 = [floor_divide_10, full_31, full_52, full_29, full_29] + del floor_divide_10, full_31 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_174 = paddle._C_ops.stack(combine_176, 0) + del combine_176 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_187 = paddle._C_ops.reshape(add_148, stack_174) + del stack_174 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(where_21, full_int_array_8) + del where_21 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(unsqueeze_69, full_int_array_7) + del unsqueeze_69 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_149 = paddle._C_ops.add(reshape_187, unsqueeze_32) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_177 = [slice_190, full_52, full_29, full_29] + del full_52 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_175 = paddle._C_ops.stack(combine_177, 0) + del combine_177 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(add_149, stack_175) + del stack_175 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_21 = paddle._C_ops.softmax(reshape_343, -1) + del reshape_343 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_145 = paddle._C_ops.matmul(softmax_21, slice_21, False, False) + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_131 = paddle._C_ops.transpose(matmul_145, [0, 2, 1, 3]) + del matmul_145 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_178 = [slice_190, full_29, full_44] + del slice_190 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_176 = paddle._C_ops.stack(combine_178, 0) + del combine_178 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_131, stack_176) + del stack_176 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_109 = paddle._C_ops.matmul(reshape_188, parameter_38, False, False) + del parameter_38 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_150 = paddle._C_ops.add(matmul_109, parameter_37) + del parameter_37 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_150, full_int_array_38) + del full_int_array_38 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_344 = paddle._C_ops.reshape(reshape_189, full_int_array_40) + del full_int_array_40 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_132 = paddle._C_ops.transpose(reshape_344, [0, 1, 3, 2, 4, 5]) + del reshape_344 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_190 = paddle._C_ops.reshape(transpose_132, full_int_array_41) + del full_int_array_41 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_190, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_179 = [slice_187, full_56, full_44] + del full_56, slice_187 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_177 = paddle._C_ops.stack(combine_179, 0) + del combine_179 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_191 = paddle._C_ops.reshape(roll_21, stack_177) + del stack_177 + + # pd_op.full: (xf32) <- () + full_22 = paddle._C_ops.full( + [], + float("0.543478"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_235 = full_22 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_125 = paddle._C_ops.shape64(reshape_191) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_193 = paddle._C_ops.slice( + shape64_125, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_125 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_180 = [slice_193, full_40, full_40] + del slice_193 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_178 = paddle._C_ops.stack(combine_180, 0) + del combine_180 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_40 = paddle._C_ops.uniform( + stack_178, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_178 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_221 = paddle._C_ops.add(full_22, uniform_40) + del uniform_40 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_40 = paddle._C_ops.floor(add_221) + del add_221 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_40 = paddle._C_ops.divide(reshape_191, full_22) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_40 = paddle._C_ops.multiply(divide_40, floor_40) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_151 = paddle._C_ops.add(add_146, multiply_40) + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del parameter_34 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_152 = paddle._C_ops.add(matmul_110, parameter_33) + del parameter_33 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_21 = paddle._C_ops.gelu(add_152, False) + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_111 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del parameter_32 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_153 = paddle._C_ops.add(matmul_111, parameter_31) + del parameter_31 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_126 = paddle._C_ops.shape64(add_153) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_194 = paddle._C_ops.slice( + shape64_126, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_126 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_181 = [slice_194, full_40, full_40] + del slice_194 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_179 = paddle._C_ops.stack(combine_181, 0) + del combine_181 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_41 = paddle._C_ops.uniform( + stack_179, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_179 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_222 = paddle._C_ops.add(full_22, uniform_41) + del uniform_41 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_41 = paddle._C_ops.floor(add_222) + del add_222 + + # pd_op.divide: (-1x196x512xf32) <- (-1x196x512xf32, xf32) + divide_41 = paddle._C_ops.divide(add_153, full_22) + + # pd_op.multiply: (-1x196x512xf32) <- (-1x196x512xf32, -1x1x1xf32) + multiply_41 = paddle._C_ops.multiply(divide_41, floor_41) + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_154 = paddle._C_ops.add(add_151, multiply_41) + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_127 = paddle._C_ops.shape64(add_154) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_195 = paddle._C_ops.slice( + shape64_127, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_127 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_182 = [slice_195, full_54, full_54, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_180 = paddle._C_ops.stack(combine_182, 0) + del combine_182 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_192 = paddle._C_ops.reshape(add_154, stack_180) + del stack_180 + + # pd_op.strided_slice: (-1x7x7x512xf32) <- (-1x14x14x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_16, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x7x7x512xf32) <- (-1x14x14x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_5, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x7x7x512xf32) <- (-1x14x14x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_6, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x7x7x512xf32) <- (-1x14x14x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_18, full_int_array_29, full_int_array_4 + ) + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_128 = paddle._C_ops.shape64(reshape_192) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_196 = paddle._C_ops.slice( + shape64_128, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_128 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_183 = [slice_196, full_54, full_54, full_44] + del full_44, full_54, slice_196 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_181 = paddle._C_ops.stack(combine_183, 0) + del combine_183 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 4xi64) + reshape_345 = paddle._C_ops.reshape(reshape_192, stack_181) + del stack_181 + + # builtin.combine: ([-1x7x7x512xf32, -1x7x7x512xf32, -1x7x7x512xf32, -1x7x7x512xf32]) <- (-1x7x7x512xf32, -1x7x7x512xf32, -1x7x7x512xf32, -1x7x7x512xf32) + combine_184 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + + # pd_op.concat: (-1x7x7x2048xf32) <- ([-1x7x7x512xf32, -1x7x7x512xf32, -1x7x7x512xf32, -1x7x7x512xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_184, full_2) + del combine_184 + + # pd_op.full: (xi64) <- () + full_69 = paddle._C_ops.full( + [], float("2048"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_185 = [slice_195, full_43, full_69] + del full_43, full_69, slice_195 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_182 = paddle._C_ops.stack(combine_185, 0) + del combine_185 + + # pd_op.reshape: (-1x-1x2048xf32) <- (-1x7x7x2048xf32, 3xi64) + reshape_193 = paddle._C_ops.reshape(concat_2, stack_182) + del stack_182 + + # pd_op.layer_norm: (-1x-1x2048xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x2048xf32, 2048xf32, 2048xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_193, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30 + + # pd_op.matmul: (-1x-1x1024xf32) <- (-1x-1x2048xf32, 2048x1024xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del parameter_28 + + # pd_op.shape64: (3xi64) <- (-1x-1x1024xf32) + shape64_129 = paddle._C_ops.shape64(matmul_112) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_197 = paddle._C_ops.slice( + shape64_129, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_129 + + # pd_op.shape64: (3xi64) <- (-1x-1x1024xf32) + shape64_130 = paddle._C_ops.shape64(matmul_112) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_198 = paddle._C_ops.slice( + shape64_130, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + del shape64_130 + + # pd_op.layer_norm: (-1x-1x1024xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1024xf32, 1024xf32, 1024xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_112, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_186 = [slice_197, full_28, full_28, full_53] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_183 = paddle._C_ops.stack(combine_186, 0) + del combine_186 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x-1x1024xf32, 4xi64) + reshape_194 = paddle._C_ops.reshape(layer_norm_144, stack_183) + del stack_183 + + # pd_op.shape64: (4xi64) <- (-1x7x7x1024xf32) + shape64_131 = paddle._C_ops.shape64(reshape_194) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_199 = paddle._C_ops.slice( + shape64_131, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_131 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_187 = [slice_199, full_40, full_28, full_40, full_28, full_53] + del slice_199 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_184 = paddle._C_ops.stack(combine_187, 0) + del combine_187 + + # pd_op.reshape: (-1x1x7x1x7x1024xf32) <- (-1x7x7x1024xf32, 6xi64) + reshape_346 = paddle._C_ops.reshape(reshape_194, stack_184) + del stack_184 + + # pd_op.transpose: (-1x1x1x7x7x1024xf32) <- (-1x1x7x1x7x1024xf32) + transpose_133 = paddle._C_ops.transpose(reshape_346, [0, 1, 3, 2, 4, 5]) + del reshape_346 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_43 = [-1, 7, 7, 1024] + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x1x1x7x7x1024xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_133, full_int_array_43) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_44 = [-1, 49, 1024] + + # pd_op.reshape: (-1x49x1024xf32) <- (-1x7x7x1024xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_44) + + # pd_op.shape64: (3xi64) <- (-1x49x1024xf32) + shape64_132 = paddle._C_ops.shape64(reshape_196) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_200 = paddle._C_ops.slice( + shape64_132, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_132 + + # pd_op.matmul: (-1x49x3072xf32) <- (-1x49x1024xf32, 1024x3072xf32) + matmul_113 = paddle._C_ops.matmul(reshape_196, parameter_25, False, False) + del parameter_25 + + # pd_op.add: (-1x49x3072xf32) <- (-1x49x3072xf32, 3072xf32) + add_155 = paddle._C_ops.add(matmul_113, parameter_24) + del parameter_24 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_188 = [slice_200, full_29, full_30, full_32, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_185 = paddle._C_ops.stack(combine_188, 0) + del combine_188 + + # pd_op.reshape: (-1x49x3x32x32xf32) <- (-1x49x3072xf32, 5xi64) + reshape_347 = paddle._C_ops.reshape(add_155, stack_185) + del stack_185 + + # pd_op.transpose: (3x-1x32x49x32xf32) <- (-1x49x3x32x32xf32) + transpose_134 = paddle._C_ops.transpose(reshape_347, [2, 0, 3, 1, 4]) + del reshape_347 + + # pd_op.slice: (-1x32x49x32xf32) <- (3x-1x32x49x32xf32, 1xi64, 1xi64) + slice_201 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x32x49x32xf32) <- (3x-1x32x49x32xf32, 1xi64, 1xi64) + slice_202 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x32x49x32xf32) <- (3x-1x32x49x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x32x49x32xf32) <- (-1x32x49x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_201, full_0, float("0"), True) + del slice_201 + + # pd_op.transpose: (-1x32x32x49xf32) <- (-1x32x49x32xf32) + transpose_135 = paddle._C_ops.transpose(slice_202, [0, 1, 3, 2]) + del slice_202 + + # pd_op.matmul: (-1x32x49x49xf32) <- (-1x32x49x32xf32, -1x32x32x49xf32) + matmul_114 = paddle._C_ops.matmul(scale_22, transpose_135, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_197 = paddle._C_ops.reshape(data_45, full_int_array_12) + del data_45 + + # pd_op.index_select: (2401x32xf32) <- (169x32xf32, 2401xi64) + index_select_22 = paddle._C_ops.index_select(data_46, reshape_197, 0) + del data_46 + + # pd_op.reshape: (49x49x32xf32) <- (2401x32xf32, 3xi64) + reshape_348 = paddle._C_ops.reshape(index_select_22, full_int_array_13) + + # pd_op.transpose: (32x49x49xf32) <- (49x49x32xf32) + transpose_136 = paddle._C_ops.transpose(reshape_348, [2, 0, 1]) + del reshape_348 + + # pd_op.unsqueeze: (1x32x49x49xf32) <- (32x49x49xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_136, full_int_array_7) + + # pd_op.add: (-1x32x49x49xf32) <- (-1x32x49x49xf32, 1x32x49x49xf32) + add_223 = paddle._C_ops.add(matmul_114, unsqueeze_33) + + # pd_op.softmax: (-1x32x49x49xf32) <- (-1x32x49x49xf32) + softmax_22 = paddle._C_ops.softmax(add_223, -1) + del add_223 + + # pd_op.matmul: (-1x32x49x32xf32) <- (-1x32x49x49xf32, -1x32x49x32xf32) + matmul_146 = paddle._C_ops.matmul(softmax_22, slice_22, False, False) + + # pd_op.transpose: (-1x49x32x32xf32) <- (-1x32x49x32xf32) + transpose_137 = paddle._C_ops.transpose(matmul_146, [0, 2, 1, 3]) + del matmul_146 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_189 = [slice_200, full_29, full_53] + del slice_200 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_186 = paddle._C_ops.stack(combine_189, 0) + del combine_189 + + # pd_op.reshape: (-1x49x1024xf32) <- (-1x49x32x32xf32, 3xi64) + reshape_198 = paddle._C_ops.reshape(transpose_137, stack_186) + del stack_186 + + # pd_op.matmul: (-1x49x1024xf32) <- (-1x49x1024xf32, 1024x1024xf32) + matmul_115 = paddle._C_ops.matmul(reshape_198, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, 1024xf32) + add_156 = paddle._C_ops.add(matmul_115, parameter_22) + del parameter_22 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x49x1024xf32, 4xi64) + reshape_199 = paddle._C_ops.reshape(add_156, full_int_array_43) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_45 = [-1, 1, 1, 7, 7, 1024] + + # pd_op.reshape: (-1x1x1x7x7x1024xf32) <- (-1x7x7x1024xf32, 6xi64) + reshape_349 = paddle._C_ops.reshape(reshape_199, full_int_array_45) + + # pd_op.transpose: (-1x1x7x1x7x1024xf32) <- (-1x1x1x7x7x1024xf32) + transpose_138 = paddle._C_ops.transpose(reshape_349, [0, 1, 3, 2, 4, 5]) + del reshape_349 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x1x7x1x7x1024xf32, 4xi64) + reshape_200 = paddle._C_ops.reshape(transpose_138, full_int_array_43) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_190 = [slice_197, full_29, full_53] + del slice_197 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_187 = paddle._C_ops.stack(combine_190, 0) + del combine_190 + + # pd_op.reshape: (-1x49x1024xf32) <- (-1x7x7x1024xf32, 3xi64) + reshape_201 = paddle._C_ops.reshape(reshape_200, stack_187) + del stack_187 + + # pd_op.full: (xf32) <- () + full_23 = paddle._C_ops.full( + [], + float("0.521739"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_257 = full_23 + + # pd_op.shape64: (3xi64) <- (-1x49x1024xf32) + shape64_133 = paddle._C_ops.shape64(reshape_201) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_203 = paddle._C_ops.slice( + shape64_133, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_133 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_191 = [slice_203, full_40, full_40] + del slice_203 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_188 = paddle._C_ops.stack(combine_191, 0) + del combine_191 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_42 = paddle._C_ops.uniform( + stack_188, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_188 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_224 = paddle._C_ops.add(full_23, uniform_42) + del uniform_42 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_42 = paddle._C_ops.floor(add_224) + del add_224 + + # pd_op.divide: (-1x49x1024xf32) <- (-1x49x1024xf32, xf32) + divide_42 = paddle._C_ops.divide(reshape_201, full_23) + + # pd_op.multiply: (-1x49x1024xf32) <- (-1x49x1024xf32, -1x1x1xf32) + multiply_42 = paddle._C_ops.multiply(divide_42, floor_42) + + # pd_op.add: (-1x49x1024xf32) <- (-1x-1x1024xf32, -1x49x1024xf32) + add_157 = paddle._C_ops.add(matmul_112, multiply_42) + + # pd_op.layer_norm: (-1x49x1024xf32, -1x49xf32, -1x49xf32) <- (-1x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_157, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (-1x49x4096xf32) <- (-1x49x1024xf32, 1024x4096xf32) + matmul_116 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del parameter_19 + + # pd_op.add: (-1x49x4096xf32) <- (-1x49x4096xf32, 4096xf32) + add_158 = paddle._C_ops.add(matmul_116, parameter_18) + del parameter_18 + + # pd_op.gelu: (-1x49x4096xf32) <- (-1x49x4096xf32) + gelu_22 = paddle._C_ops.gelu(add_158, False) + + # pd_op.matmul: (-1x49x1024xf32) <- (-1x49x4096xf32, 4096x1024xf32) + matmul_117 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, 1024xf32) + add_159 = paddle._C_ops.add(matmul_117, parameter_16) + del parameter_16 + + # pd_op.shape64: (3xi64) <- (-1x49x1024xf32) + shape64_134 = paddle._C_ops.shape64(add_159) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_204 = paddle._C_ops.slice( + shape64_134, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_134 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_192 = [slice_204, full_40, full_40] + del slice_204 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_189 = paddle._C_ops.stack(combine_192, 0) + del combine_192 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_43 = paddle._C_ops.uniform( + stack_189, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_189 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_225 = paddle._C_ops.add(full_23, uniform_43) + del uniform_43 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_43 = paddle._C_ops.floor(add_225) + del add_225 + + # pd_op.divide: (-1x49x1024xf32) <- (-1x49x1024xf32, xf32) + divide_43 = paddle._C_ops.divide(add_159, full_23) + + # pd_op.multiply: (-1x49x1024xf32) <- (-1x49x1024xf32, -1x1x1xf32) + multiply_43 = paddle._C_ops.multiply(divide_43, floor_43) + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, -1x49x1024xf32) + add_160 = paddle._C_ops.add(add_157, multiply_43) + + # pd_op.shape64: (3xi64) <- (-1x49x1024xf32) + shape64_135 = paddle._C_ops.shape64(add_160) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_205 = paddle._C_ops.slice( + shape64_135, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_135 + + # pd_op.layer_norm: (-1x49x1024xf32, -1x49xf32, -1x49xf32) <- (-1x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_160, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_193 = [slice_205, full_28, full_28, full_53] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_190 = paddle._C_ops.stack(combine_193, 0) + del combine_193 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x49x1024xf32, 4xi64) + reshape_202 = paddle._C_ops.reshape(layer_norm_150, stack_190) + del stack_190 + + # pd_op.shape64: (4xi64) <- (-1x7x7x1024xf32) + shape64_136 = paddle._C_ops.shape64(reshape_202) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_206 = paddle._C_ops.slice( + shape64_136, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_136 + + # pd_op.roll: (-1x7x7x1024xf32) <- (-1x7x7x1024xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_202, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x7x7x1024xf32) + shape64_137 = paddle._C_ops.shape64(roll_22) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_207 = paddle._C_ops.slice( + shape64_137, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_137 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_194 = [slice_207, full_40, full_28, full_40, full_28, full_53] + del full_28, slice_207 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_191 = paddle._C_ops.stack(combine_194, 0) + del combine_194 + + # pd_op.reshape: (-1x1x7x1x7x1024xf32) <- (-1x7x7x1024xf32, 6xi64) + reshape_350 = paddle._C_ops.reshape(roll_22, stack_191) + del stack_191 + + # pd_op.transpose: (-1x1x1x7x7x1024xf32) <- (-1x1x7x1x7x1024xf32) + transpose_139 = paddle._C_ops.transpose(reshape_350, [0, 1, 3, 2, 4, 5]) + del reshape_350 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x1x1x7x7x1024xf32, 4xi64) + reshape_203 = paddle._C_ops.reshape(transpose_139, full_int_array_43) + + # pd_op.reshape: (-1x49x1024xf32) <- (-1x7x7x1024xf32, 3xi64) + reshape_204 = paddle._C_ops.reshape(reshape_203, full_int_array_44) + del full_int_array_44 + + # pd_op.full: (1x7x7x1xf32) <- () + full_70 = paddle._C_ops.full( + [1, 7, 7, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_70, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_70, full_int_array_16 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_19, set_value__100 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_21, full_int_array_22, set_value__101 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_23, set_value__102 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_17, set_value__103 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_20, full_int_array_25, set_value__104 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_26, full_int_array_27, set_value__105 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_24, full_int_array_28, set_value__106 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_29, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_46 = [1, 1, 7, 1, 7, 1] + + # pd_op.reshape: (1x1x7x1x7x1xf32) <- (1x7x7x1xf32, 6xi64) + reshape_351 = paddle._C_ops.reshape(set_value__11, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (1x1x1x7x7x1xf32) <- (1x1x7x1x7x1xf32) + transpose_157 = paddle._C_ops.transpose(reshape_351, [0, 1, 3, 2, 4, 5]) + del reshape_351 + + # pd_op.reshape: (1x7x7x1xf32) <- (1x1x1x7x7x1xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(transpose_157, full_int_array_31) + del full_int_array_31, transpose_157 + + # pd_op.reshape: (1x49xf32) <- (1x7x7x1xf32, 2xi64) + reshape_353 = paddle._C_ops.reshape(reshape_352, full_int_array_32) + del full_int_array_32, reshape_352 + + # pd_op.unsqueeze: (1x1x49xf32) <- (1x49xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_8) + + # pd_op.unsqueeze: (1x49x1xf32) <- (1x49xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_0) + del reshape_353 + + # pd_op.subtract: (1x49x49xf32) <- (1x1x49xf32, 1x49x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_70, unsqueeze_71) + del unsqueeze_70, unsqueeze_71 + + # pd_op.not_equal: (1x49x49xb) <- (1x49x49xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_35) + + # pd_op.full: (1x49x49xf32) <- () + full_71 = paddle._C_ops.full( + [1, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_71, subtract_11) + del full_71, not_equal_11, subtract_11 + + # pd_op.equal: (1x49x49xb) <- (1x49x49xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_35) + del full_35 + + # pd_op.full: (1x49x49xf32) <- () + full_72 = paddle._C_ops.full( + [1, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_23 = paddle._C_ops.where(equal_11, full_72, where_22) + del equal_11, full_72, where_22 + + # pd_op.shape64: (3xi64) <- (-1x49x1024xf32) + shape64_138 = paddle._C_ops.shape64(reshape_204) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_208 = paddle._C_ops.slice( + shape64_138, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_138 + + # pd_op.matmul: (-1x49x3072xf32) <- (-1x49x1024xf32, 1024x3072xf32) + matmul_118 = paddle._C_ops.matmul(reshape_204, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (-1x49x3072xf32) <- (-1x49x3072xf32, 3072xf32) + add_161 = paddle._C_ops.add(matmul_118, parameter_12) + del parameter_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_195 = [slice_208, full_29, full_30, full_32, full_32] + del full_30 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_192 = paddle._C_ops.stack(combine_195, 0) + del combine_195 + + # pd_op.reshape: (-1x49x3x32x32xf32) <- (-1x49x3072xf32, 5xi64) + reshape_354 = paddle._C_ops.reshape(add_161, stack_192) + del stack_192 + + # pd_op.transpose: (3x-1x32x49x32xf32) <- (-1x49x3x32x32xf32) + transpose_140 = paddle._C_ops.transpose(reshape_354, [2, 0, 3, 1, 4]) + del reshape_354 + + # pd_op.slice: (-1x32x49x32xf32) <- (3x-1x32x49x32xf32, 1xi64, 1xi64) + slice_209 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x32x49x32xf32) <- (3x-1x32x49x32xf32, 1xi64, 1xi64) + slice_210 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x32x49x32xf32) <- (3x-1x32x49x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x32x49x32xf32) <- (-1x32x49x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_209, full_0, float("0"), True) + del slice_209 + + # pd_op.transpose: (-1x32x32x49xf32) <- (-1x32x49x32xf32) + transpose_141 = paddle._C_ops.transpose(slice_210, [0, 1, 3, 2]) + del slice_210 + + # pd_op.matmul: (-1x32x49x49xf32) <- (-1x32x49x32xf32, -1x32x32x49xf32) + matmul_119 = paddle._C_ops.matmul(scale_23, transpose_141, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_205 = paddle._C_ops.reshape(data_47, full_int_array_12) + del data_47, full_int_array_12 + + # pd_op.index_select: (2401x32xf32) <- (169x32xf32, 2401xi64) + index_select_23 = paddle._C_ops.index_select(data_48, reshape_205, 0) + del data_48 + + # pd_op.reshape: (49x49x32xf32) <- (2401x32xf32, 3xi64) + reshape_355 = paddle._C_ops.reshape(index_select_23, full_int_array_13) + del full_int_array_13 + + # pd_op.transpose: (32x49x49xf32) <- (49x49x32xf32) + transpose_142 = paddle._C_ops.transpose(reshape_355, [2, 0, 1]) + del reshape_355 + + # pd_op.unsqueeze: (1x32x49x49xf32) <- (32x49x49xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(transpose_142, full_int_array_7) + + # pd_op.add: (-1x32x49x49xf32) <- (-1x32x49x49xf32, 1x32x49x49xf32) + add_162 = paddle._C_ops.add(matmul_119, unsqueeze_34) + + # pd_op.full: (xi64) <- () + full_73 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_11 = paddle._C_ops.floor_divide(slice_208, full_73) + del full_73 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_196 = [floor_divide_11, full_40, full_32, full_29, full_29] + del floor_divide_11 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_193 = paddle._C_ops.stack(combine_196, 0) + del combine_196 + + # pd_op.reshape: (-1x1x32x49x49xf32) <- (-1x32x49x49xf32, 5xi64) + reshape_206 = paddle._C_ops.reshape(add_162, stack_193) + del stack_193 + + # pd_op.unsqueeze: (1x1x49x49xf32) <- (1x49x49xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(where_23, full_int_array_8) + del where_23 + + # pd_op.unsqueeze: (1x1x1x49x49xf32) <- (1x1x49x49xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_72, full_int_array_7) + del unsqueeze_72 + + # pd_op.add: (-1x1x32x49x49xf32) <- (-1x1x32x49x49xf32, 1x1x1x49x49xf32) + add_163 = paddle._C_ops.add(reshape_206, unsqueeze_35) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_197 = [slice_208, full_32, full_29, full_29] + del full_32 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_194 = paddle._C_ops.stack(combine_197, 0) + del combine_197 + + # pd_op.reshape: (-1x32x49x49xf32) <- (-1x1x32x49x49xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(add_163, stack_194) + del stack_194 + + # pd_op.softmax: (-1x32x49x49xf32) <- (-1x32x49x49xf32) + softmax_23 = paddle._C_ops.softmax(reshape_356, -1) + del reshape_356 + + # pd_op.matmul: (-1x32x49x32xf32) <- (-1x32x49x49xf32, -1x32x49x32xf32) + matmul_147 = paddle._C_ops.matmul(softmax_23, slice_23, False, False) + + # pd_op.transpose: (-1x49x32x32xf32) <- (-1x32x49x32xf32) + transpose_143 = paddle._C_ops.transpose(matmul_147, [0, 2, 1, 3]) + del matmul_147 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_198 = [slice_208, full_29, full_53] + del slice_208 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_195 = paddle._C_ops.stack(combine_198, 0) + del combine_198 + + # pd_op.reshape: (-1x49x1024xf32) <- (-1x49x32x32xf32, 3xi64) + reshape_207 = paddle._C_ops.reshape(transpose_143, stack_195) + del stack_195 + + # pd_op.matmul: (-1x49x1024xf32) <- (-1x49x1024xf32, 1024x1024xf32) + matmul_120 = paddle._C_ops.matmul(reshape_207, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, 1024xf32) + add_164 = paddle._C_ops.add(matmul_120, parameter_10) + del parameter_10 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x49x1024xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(add_164, full_int_array_43) + + # pd_op.reshape: (-1x1x1x7x7x1024xf32) <- (-1x7x7x1024xf32, 6xi64) + reshape_357 = paddle._C_ops.reshape(reshape_208, full_int_array_45) + del full_int_array_45 + + # pd_op.transpose: (-1x1x7x1x7x1024xf32) <- (-1x1x1x7x7x1024xf32) + transpose_144 = paddle._C_ops.transpose(reshape_357, [0, 1, 3, 2, 4, 5]) + del reshape_357 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x1x7x1x7x1024xf32, 4xi64) + reshape_209 = paddle._C_ops.reshape(transpose_144, full_int_array_43) + del full_int_array_43 + + # pd_op.roll: (-1x7x7x1024xf32) <- (-1x7x7x1024xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_209, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_199 = [slice_205, full_29, full_53] + del full_29, full_53, slice_205 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_196 = paddle._C_ops.stack(combine_199, 0) + del combine_199 + + # pd_op.reshape: (-1x49x1024xf32) <- (-1x7x7x1024xf32, 3xi64) + reshape_210 = paddle._C_ops.reshape(roll_23, stack_196) + del stack_196 + + # pd_op.full: (xf32) <- () + full_24 = paddle._C_ops.full( + [], float("0.5"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_268 = full_24 + + # pd_op.shape64: (3xi64) <- (-1x49x1024xf32) + shape64_139 = paddle._C_ops.shape64(reshape_210) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_211 = paddle._C_ops.slice( + shape64_139, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_139 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_200 = [slice_211, full_40, full_40] + del slice_211 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_197 = paddle._C_ops.stack(combine_200, 0) + del combine_200 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_44 = paddle._C_ops.uniform( + stack_197, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_197 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_226 = paddle._C_ops.add(full_24, uniform_44) + del uniform_44 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_44 = paddle._C_ops.floor(add_226) + del add_226 + + # pd_op.divide: (-1x49x1024xf32) <- (-1x49x1024xf32, xf32) + divide_44 = paddle._C_ops.divide(reshape_210, full_24) + + # pd_op.multiply: (-1x49x1024xf32) <- (-1x49x1024xf32, -1x1x1xf32) + multiply_44 = paddle._C_ops.multiply(divide_44, floor_44) + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, -1x49x1024xf32) + add_165 = paddle._C_ops.add(add_160, multiply_44) + + # pd_op.layer_norm: (-1x49x1024xf32, -1x49xf32, -1x49xf32) <- (-1x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_165, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (-1x49x4096xf32) <- (-1x49x1024xf32, 1024x4096xf32) + matmul_121 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (-1x49x4096xf32) <- (-1x49x4096xf32, 4096xf32) + add_166 = paddle._C_ops.add(matmul_121, parameter_6) + del parameter_6 + + # pd_op.gelu: (-1x49x4096xf32) <- (-1x49x4096xf32) + gelu_23 = paddle._C_ops.gelu(add_166, False) + + # pd_op.matmul: (-1x49x1024xf32) <- (-1x49x4096xf32, 4096x1024xf32) + matmul_122 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del parameter_5 + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, 1024xf32) + add_167 = paddle._C_ops.add(matmul_122, parameter_4) + del parameter_4 + + # pd_op.shape64: (3xi64) <- (-1x49x1024xf32) + shape64_140 = paddle._C_ops.shape64(add_167) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_212 = paddle._C_ops.slice( + shape64_140, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del full_int_array_7, full_int_array_8, shape64_140 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_201 = [slice_212, full_40, full_40] + del full_40, slice_212 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_198 = paddle._C_ops.stack(combine_201, 0) + del combine_201 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_45 = paddle._C_ops.uniform( + stack_198, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del full_41, full_42, stack_198 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_227 = paddle._C_ops.add(full_24, uniform_45) + del uniform_45 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_45 = paddle._C_ops.floor(add_227) + del add_227 + + # pd_op.divide: (-1x49x1024xf32) <- (-1x49x1024xf32, xf32) + divide_45 = paddle._C_ops.divide(add_167, full_24) + + # pd_op.multiply: (-1x49x1024xf32) <- (-1x49x1024xf32, -1x1x1xf32) + multiply_45 = paddle._C_ops.multiply(divide_45, floor_45) + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, -1x49x1024xf32) + add_168 = paddle._C_ops.add(add_165, multiply_45) + + # pd_op.layer_norm: (-1x49x1024xf32, -1x49xf32, -1x49xf32) <- (-1x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_158, layer_norm_156, layer_norm_157 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_168, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_2, parameter_3 + + # pd_op.transpose: (-1x1024x49xf32) <- (-1x49x1024xf32) + transpose_145 = paddle._C_ops.transpose(layer_norm_158, [0, 2, 1]) + del layer_norm_158 + + # pd_op.unsqueeze: (-1x1024x1x49xf32) <- (-1x1024x49xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_145, full_int_array_0) + + # pd_op.pool2d: (-1x1024x1x1xf32) <- (-1x1024x1x49xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_36, + full_int_array_18, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_18 + + # pd_op.squeeze: (-1x1024x1xf32) <- (-1x1024x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_0) + + # pd_op.flatten: (-1x1024xf32) <- (-1x1024x1xf32) + flatten_0 = paddle._C_ops.flatten(squeeze_0, 1, 2) + + # pd_op.matmul: (-1x102xf32) <- (-1x1024xf32, 1024x102xf32) + matmul_123 = paddle._C_ops.matmul(flatten_0, parameter_1, False, False) + del parameter_1 + + # pd_op.add: (-1x102xf32) <- (-1x102xf32, 102xf32) + add_169 = paddle._C_ops.add(matmul_123, parameter_0) + del ( + assign_0, + assign_1, + assign_10, + assign_100, + assign_101, + assign_103, + assign_105, + assign_106, + assign_107, + assign_108, + assign_109, + assign_110, + assign_111, + assign_113, + assign_114, + assign_116, + assign_117, + assign_118, + assign_119, + assign_12, + assign_120, + assign_121, + assign_123, + assign_125, + assign_126, + assign_127, + assign_128, + assign_129, + assign_130, + assign_131, + assign_133, + assign_134, + assign_136, + assign_137, + assign_138, + assign_139, + assign_14, + assign_140, + assign_141, + assign_143, + assign_145, + assign_146, + assign_147, + assign_148, + assign_149, + assign_15, + assign_150, + assign_151, + assign_153, + assign_154, + assign_156, + assign_157, + assign_158, + assign_159, + assign_16, + assign_160, + assign_161, + assign_163, + assign_165, + assign_166, + assign_167, + assign_168, + assign_169, + assign_17, + assign_170, + assign_171, + assign_173, + assign_174, + assign_176, + assign_177, + assign_178, + assign_179, + assign_18, + assign_180, + assign_181, + assign_183, + assign_185, + assign_186, + assign_187, + assign_188, + assign_189, + assign_19, + assign_190, + assign_191, + assign_193, + assign_194, + assign_196, + assign_197, + assign_198, + assign_199, + assign_2, + assign_20, + assign_200, + assign_201, + assign_203, + assign_205, + assign_206, + assign_207, + assign_208, + assign_209, + assign_21, + assign_210, + assign_211, + assign_213, + assign_214, + assign_216, + assign_217, + assign_218, + assign_219, + assign_22, + assign_220, + assign_221, + assign_223, + assign_225, + assign_226, + assign_227, + assign_228, + assign_229, + assign_23, + assign_230, + assign_231, + assign_233, + assign_234, + assign_236, + assign_237, + assign_238, + assign_239, + assign_24, + assign_240, + assign_241, + assign_242, + assign_243, + assign_244, + assign_245, + assign_246, + assign_247, + assign_249, + assign_25, + assign_250, + assign_251, + assign_252, + assign_253, + assign_254, + assign_256, + assign_258, + assign_259, + assign_26, + assign_260, + assign_261, + assign_262, + assign_263, + assign_264, + assign_266, + assign_267, + assign_269, + assign_27, + assign_270, + assign_271, + assign_28, + assign_3, + assign_30, + assign_32, + assign_33, + assign_34, + assign_35, + assign_36, + assign_37, + assign_38, + assign_4, + assign_40, + assign_41, + assign_43, + assign_44, + assign_45, + assign_46, + assign_47, + assign_48, + assign_49, + assign_5, + assign_50, + assign_51, + assign_52, + assign_53, + assign_54, + assign_56, + assign_57, + assign_58, + assign_59, + assign_6, + assign_60, + assign_61, + assign_63, + assign_65, + assign_66, + assign_67, + assign_68, + assign_69, + assign_7, + assign_70, + assign_71, + assign_73, + assign_74, + assign_76, + assign_77, + assign_78, + assign_79, + assign_8, + assign_80, + assign_81, + assign_83, + assign_85, + assign_86, + assign_87, + assign_88, + assign_89, + assign_9, + assign_90, + assign_91, + assign_93, + assign_94, + assign_96, + assign_97, + assign_98, + assign_99, + full_int_array_0, + full_int_array_1, + full_int_array_2, + full_int_array_3, + full_int_array_4, + full_int_array_5, + full_int_array_6, + parameter_0, + ) + + return ( + conv2d_0, + reshape_0, + add_0, + transpose_0, + layer_norm_0, + layer_norm_1, + layer_norm_2, + layer_norm_3, + layer_norm_4, + layer_norm_5, + reshape_1, + transpose_1, + reshape_2, + reshape_3, + matmul_0, + add_1, + transpose_2, + slice_0, + full_0, + scale_0, + transpose_3, + matmul_1, + reshape_4, + index_select_0, + transpose_4, + unsqueeze_0, + softmax_0, + transpose_5, + reshape_5, + matmul_2, + add_2, + reshape_6, + transpose_6, + reshape_7, + reshape_8, + add_3, + layer_norm_6, + layer_norm_7, + layer_norm_8, + matmul_3, + add_4, + gelu_0, + matmul_4, + add_5, + add_6, + layer_norm_9, + layer_norm_10, + layer_norm_11, + reshape_9, + roll_0, + transpose_7, + reshape_10, + reshape_11, + matmul_5, + add_7, + transpose_8, + slice_1, + assign_11, + scale_1, + transpose_9, + matmul_6, + reshape_12, + index_select_1, + transpose_10, + unsqueeze_1, + add_8, + reshape_13, + unsqueeze_2, + add_9, + softmax_1, + transpose_11, + reshape_14, + matmul_7, + add_10, + reshape_15, + transpose_12, + reshape_16, + roll_1, + reshape_17, + full_1, + floor_0, + divide_0, + multiply_0, + add_11, + layer_norm_12, + layer_norm_13, + layer_norm_14, + matmul_8, + add_12, + gelu_1, + matmul_9, + add_13, + assign_13, + floor_1, + divide_1, + multiply_1, + add_14, + reshape_18, + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + full_2, + concat_0, + reshape_19, + layer_norm_15, + layer_norm_16, + layer_norm_17, + matmul_10, + layer_norm_18, + layer_norm_19, + layer_norm_20, + reshape_20, + transpose_13, + reshape_21, + reshape_22, + matmul_11, + add_15, + transpose_14, + slice_2, + assign_29, + scale_2, + transpose_15, + matmul_12, + reshape_23, + index_select_2, + transpose_16, + unsqueeze_3, + softmax_2, + transpose_17, + reshape_24, + matmul_13, + add_16, + reshape_25, + transpose_18, + reshape_26, + reshape_27, + full_3, + floor_2, + divide_2, + multiply_2, + add_17, + layer_norm_21, + layer_norm_22, + layer_norm_23, + matmul_14, + add_18, + gelu_2, + matmul_15, + add_19, + assign_31, + floor_3, + divide_3, + multiply_3, + add_20, + layer_norm_24, + layer_norm_25, + layer_norm_26, + reshape_28, + roll_2, + transpose_19, + reshape_29, + reshape_30, + matmul_16, + add_21, + transpose_20, + slice_3, + assign_39, + scale_3, + transpose_21, + matmul_17, + reshape_31, + index_select_3, + transpose_22, + unsqueeze_4, + add_22, + reshape_32, + unsqueeze_5, + add_23, + softmax_3, + transpose_23, + reshape_33, + matmul_18, + add_24, + reshape_34, + transpose_24, + reshape_35, + roll_3, + reshape_36, + full_4, + floor_4, + divide_4, + multiply_4, + add_25, + layer_norm_27, + layer_norm_28, + layer_norm_29, + matmul_19, + add_26, + gelu_3, + matmul_20, + add_27, + assign_42, + floor_5, + divide_5, + multiply_5, + add_28, + reshape_37, + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + assign_55, + concat_1, + reshape_38, + layer_norm_30, + layer_norm_31, + layer_norm_32, + matmul_21, + layer_norm_33, + layer_norm_34, + layer_norm_35, + reshape_39, + transpose_25, + reshape_40, + reshape_41, + matmul_22, + add_29, + transpose_26, + slice_4, + assign_62, + scale_4, + transpose_27, + matmul_23, + reshape_42, + index_select_4, + transpose_28, + unsqueeze_6, + softmax_4, + transpose_29, + reshape_43, + matmul_24, + add_30, + reshape_44, + transpose_30, + reshape_45, + reshape_46, + full_5, + floor_6, + divide_6, + multiply_6, + add_31, + layer_norm_36, + layer_norm_37, + layer_norm_38, + matmul_25, + add_32, + gelu_4, + matmul_26, + add_33, + assign_64, + floor_7, + divide_7, + multiply_7, + add_34, + layer_norm_39, + layer_norm_40, + layer_norm_41, + reshape_47, + roll_4, + transpose_31, + reshape_48, + reshape_49, + matmul_27, + add_35, + transpose_32, + slice_5, + assign_72, + scale_5, + transpose_33, + matmul_28, + reshape_50, + index_select_5, + transpose_34, + unsqueeze_7, + add_36, + reshape_51, + unsqueeze_8, + add_37, + softmax_5, + transpose_35, + reshape_52, + matmul_29, + add_38, + reshape_53, + transpose_36, + reshape_54, + roll_5, + reshape_55, + full_6, + floor_8, + divide_8, + multiply_8, + add_39, + layer_norm_42, + layer_norm_43, + layer_norm_44, + matmul_30, + add_40, + gelu_5, + matmul_31, + add_41, + assign_75, + floor_9, + divide_9, + multiply_9, + add_42, + layer_norm_45, + layer_norm_46, + layer_norm_47, + reshape_56, + transpose_37, + reshape_57, + reshape_58, + matmul_32, + add_43, + transpose_38, + slice_6, + assign_82, + scale_6, + transpose_39, + matmul_33, + reshape_59, + index_select_6, + transpose_40, + unsqueeze_9, + softmax_6, + transpose_41, + reshape_60, + matmul_34, + add_44, + reshape_61, + transpose_42, + reshape_62, + reshape_63, + full_7, + floor_10, + divide_10, + multiply_10, + add_45, + layer_norm_48, + layer_norm_49, + layer_norm_50, + matmul_35, + add_46, + gelu_6, + matmul_36, + add_47, + assign_84, + floor_11, + divide_11, + multiply_11, + add_48, + layer_norm_51, + layer_norm_52, + layer_norm_53, + reshape_64, + roll_6, + transpose_43, + reshape_65, + reshape_66, + matmul_37, + add_49, + transpose_44, + slice_7, + assign_92, + scale_7, + transpose_45, + matmul_38, + reshape_67, + index_select_7, + transpose_46, + unsqueeze_10, + add_50, + reshape_68, + unsqueeze_11, + add_51, + softmax_7, + transpose_47, + reshape_69, + matmul_39, + add_52, + reshape_70, + transpose_48, + reshape_71, + roll_7, + reshape_72, + full_8, + floor_12, + divide_12, + multiply_12, + add_53, + layer_norm_54, + layer_norm_55, + layer_norm_56, + matmul_40, + add_54, + gelu_7, + matmul_41, + add_55, + assign_95, + floor_13, + divide_13, + multiply_13, + add_56, + layer_norm_57, + layer_norm_58, + layer_norm_59, + reshape_73, + transpose_49, + reshape_74, + reshape_75, + matmul_42, + add_57, + transpose_50, + slice_8, + assign_102, + scale_8, + transpose_51, + matmul_43, + reshape_76, + index_select_8, + transpose_52, + unsqueeze_12, + softmax_8, + transpose_53, + reshape_77, + matmul_44, + add_58, + reshape_78, + transpose_54, + reshape_79, + reshape_80, + full_9, + floor_14, + divide_14, + multiply_14, + add_59, + layer_norm_60, + layer_norm_61, + layer_norm_62, + matmul_45, + add_60, + gelu_8, + matmul_46, + add_61, + assign_104, + floor_15, + divide_15, + multiply_15, + add_62, + layer_norm_63, + layer_norm_64, + layer_norm_65, + reshape_81, + roll_8, + transpose_55, + reshape_82, + reshape_83, + matmul_47, + add_63, + transpose_56, + slice_9, + assign_112, + scale_9, + transpose_57, + matmul_48, + reshape_84, + index_select_9, + transpose_58, + unsqueeze_13, + add_64, + reshape_85, + unsqueeze_14, + add_65, + softmax_9, + transpose_59, + reshape_86, + matmul_49, + add_66, + reshape_87, + transpose_60, + reshape_88, + roll_9, + reshape_89, + full_10, + floor_16, + divide_16, + multiply_16, + add_67, + layer_norm_66, + layer_norm_67, + layer_norm_68, + matmul_50, + add_68, + gelu_9, + matmul_51, + add_69, + assign_115, + floor_17, + divide_17, + multiply_17, + add_70, + layer_norm_69, + layer_norm_70, + layer_norm_71, + reshape_90, + transpose_61, + reshape_91, + reshape_92, + matmul_52, + add_71, + transpose_62, + slice_10, + assign_122, + scale_10, + transpose_63, + matmul_53, + reshape_93, + index_select_10, + transpose_64, + unsqueeze_15, + softmax_10, + transpose_65, + reshape_94, + matmul_54, + add_72, + reshape_95, + transpose_66, + reshape_96, + reshape_97, + full_11, + floor_18, + divide_18, + multiply_18, + add_73, + layer_norm_72, + layer_norm_73, + layer_norm_74, + matmul_55, + add_74, + gelu_10, + matmul_56, + add_75, + assign_124, + floor_19, + divide_19, + multiply_19, + add_76, + layer_norm_75, + layer_norm_76, + layer_norm_77, + reshape_98, + roll_10, + transpose_67, + reshape_99, + reshape_100, + matmul_57, + add_77, + transpose_68, + slice_11, + assign_132, + scale_11, + transpose_69, + matmul_58, + reshape_101, + index_select_11, + transpose_70, + unsqueeze_16, + add_78, + reshape_102, + unsqueeze_17, + add_79, + softmax_11, + transpose_71, + reshape_103, + matmul_59, + add_80, + reshape_104, + transpose_72, + reshape_105, + roll_11, + reshape_106, + full_12, + floor_20, + divide_20, + multiply_20, + add_81, + layer_norm_78, + layer_norm_79, + layer_norm_80, + matmul_60, + add_82, + gelu_11, + matmul_61, + add_83, + assign_135, + floor_21, + divide_21, + multiply_21, + add_84, + layer_norm_81, + layer_norm_82, + layer_norm_83, + reshape_107, + transpose_73, + reshape_108, + reshape_109, + matmul_62, + add_85, + transpose_74, + slice_12, + assign_142, + scale_12, + transpose_75, + matmul_63, + reshape_110, + index_select_12, + transpose_76, + unsqueeze_18, + softmax_12, + transpose_77, + reshape_111, + matmul_64, + add_86, + reshape_112, + transpose_78, + reshape_113, + reshape_114, + full_13, + floor_22, + divide_22, + multiply_22, + add_87, + layer_norm_84, + layer_norm_85, + layer_norm_86, + matmul_65, + add_88, + gelu_12, + matmul_66, + add_89, + assign_144, + floor_23, + divide_23, + multiply_23, + add_90, + layer_norm_87, + layer_norm_88, + layer_norm_89, + reshape_115, + roll_12, + transpose_79, + reshape_116, + reshape_117, + matmul_67, + add_91, + transpose_80, + slice_13, + assign_152, + scale_13, + transpose_81, + matmul_68, + reshape_118, + index_select_13, + transpose_82, + unsqueeze_19, + add_92, + reshape_119, + unsqueeze_20, + add_93, + softmax_13, + transpose_83, + reshape_120, + matmul_69, + add_94, + reshape_121, + transpose_84, + reshape_122, + roll_13, + reshape_123, + full_14, + floor_24, + divide_24, + multiply_24, + add_95, + layer_norm_90, + layer_norm_91, + layer_norm_92, + matmul_70, + add_96, + gelu_13, + matmul_71, + add_97, + assign_155, + floor_25, + divide_25, + multiply_25, + add_98, + layer_norm_93, + layer_norm_94, + layer_norm_95, + reshape_124, + transpose_85, + reshape_125, + reshape_126, + matmul_72, + add_99, + transpose_86, + slice_14, + assign_162, + scale_14, + transpose_87, + matmul_73, + reshape_127, + index_select_14, + transpose_88, + unsqueeze_21, + softmax_14, + transpose_89, + reshape_128, + matmul_74, + add_100, + reshape_129, + transpose_90, + reshape_130, + reshape_131, + full_15, + floor_26, + divide_26, + multiply_26, + add_101, + layer_norm_96, + layer_norm_97, + layer_norm_98, + matmul_75, + add_102, + gelu_14, + matmul_76, + add_103, + assign_164, + floor_27, + divide_27, + multiply_27, + add_104, + layer_norm_99, + layer_norm_100, + layer_norm_101, + reshape_132, + roll_14, + transpose_91, + reshape_133, + reshape_134, + matmul_77, + add_105, + transpose_92, + slice_15, + assign_172, + scale_15, + transpose_93, + matmul_78, + reshape_135, + index_select_15, + transpose_94, + unsqueeze_22, + add_106, + reshape_136, + unsqueeze_23, + add_107, + softmax_15, + transpose_95, + reshape_137, + matmul_79, + add_108, + reshape_138, + transpose_96, + reshape_139, + roll_15, + reshape_140, + full_16, + floor_28, + divide_28, + multiply_28, + add_109, + layer_norm_102, + layer_norm_103, + layer_norm_104, + matmul_80, + add_110, + gelu_15, + matmul_81, + add_111, + assign_175, + floor_29, + divide_29, + multiply_29, + add_112, + layer_norm_105, + layer_norm_106, + layer_norm_107, + reshape_141, + transpose_97, + reshape_142, + reshape_143, + matmul_82, + add_113, + transpose_98, + slice_16, + assign_182, + scale_16, + transpose_99, + matmul_83, + reshape_144, + index_select_16, + transpose_100, + unsqueeze_24, + softmax_16, + transpose_101, + reshape_145, + matmul_84, + add_114, + reshape_146, + transpose_102, + reshape_147, + reshape_148, + full_17, + floor_30, + divide_30, + multiply_30, + add_115, + layer_norm_108, + layer_norm_109, + layer_norm_110, + matmul_85, + add_116, + gelu_16, + matmul_86, + add_117, + assign_184, + floor_31, + divide_31, + multiply_31, + add_118, + layer_norm_111, + layer_norm_112, + layer_norm_113, + reshape_149, + roll_16, + transpose_103, + reshape_150, + reshape_151, + matmul_87, + add_119, + transpose_104, + slice_17, + assign_192, + scale_17, + transpose_105, + matmul_88, + reshape_152, + index_select_17, + transpose_106, + unsqueeze_25, + add_120, + reshape_153, + unsqueeze_26, + add_121, + softmax_17, + transpose_107, + reshape_154, + matmul_89, + add_122, + reshape_155, + transpose_108, + reshape_156, + roll_17, + reshape_157, + full_18, + floor_32, + divide_32, + multiply_32, + add_123, + layer_norm_114, + layer_norm_115, + layer_norm_116, + matmul_90, + add_124, + gelu_17, + matmul_91, + add_125, + assign_195, + floor_33, + divide_33, + multiply_33, + add_126, + layer_norm_117, + layer_norm_118, + layer_norm_119, + reshape_158, + transpose_109, + reshape_159, + reshape_160, + matmul_92, + add_127, + transpose_110, + slice_18, + assign_202, + scale_18, + transpose_111, + matmul_93, + reshape_161, + index_select_18, + transpose_112, + unsqueeze_27, + softmax_18, + transpose_113, + reshape_162, + matmul_94, + add_128, + reshape_163, + transpose_114, + reshape_164, + reshape_165, + full_19, + floor_34, + divide_34, + multiply_34, + add_129, + layer_norm_120, + layer_norm_121, + layer_norm_122, + matmul_95, + add_130, + gelu_18, + matmul_96, + add_131, + assign_204, + floor_35, + divide_35, + multiply_35, + add_132, + layer_norm_123, + layer_norm_124, + layer_norm_125, + reshape_166, + roll_18, + transpose_115, + reshape_167, + reshape_168, + matmul_97, + add_133, + transpose_116, + slice_19, + assign_212, + scale_19, + transpose_117, + matmul_98, + reshape_169, + index_select_19, + transpose_118, + unsqueeze_28, + add_134, + reshape_170, + unsqueeze_29, + add_135, + softmax_19, + transpose_119, + reshape_171, + matmul_99, + add_136, + reshape_172, + transpose_120, + reshape_173, + roll_19, + reshape_174, + full_20, + floor_36, + divide_36, + multiply_36, + add_137, + layer_norm_126, + layer_norm_127, + layer_norm_128, + matmul_100, + add_138, + gelu_19, + matmul_101, + add_139, + assign_215, + floor_37, + divide_37, + multiply_37, + add_140, + layer_norm_129, + layer_norm_130, + layer_norm_131, + reshape_175, + transpose_121, + reshape_176, + reshape_177, + matmul_102, + add_141, + transpose_122, + slice_20, + assign_222, + scale_20, + transpose_123, + matmul_103, + reshape_178, + index_select_20, + transpose_124, + unsqueeze_30, + softmax_20, + transpose_125, + reshape_179, + matmul_104, + add_142, + reshape_180, + transpose_126, + reshape_181, + reshape_182, + full_21, + floor_38, + divide_38, + multiply_38, + add_143, + layer_norm_132, + layer_norm_133, + layer_norm_134, + matmul_105, + add_144, + gelu_20, + matmul_106, + add_145, + assign_224, + floor_39, + divide_39, + multiply_39, + add_146, + layer_norm_135, + layer_norm_136, + layer_norm_137, + reshape_183, + roll_20, + transpose_127, + reshape_184, + reshape_185, + matmul_107, + add_147, + transpose_128, + slice_21, + assign_232, + scale_21, + transpose_129, + matmul_108, + reshape_186, + index_select_21, + transpose_130, + unsqueeze_31, + add_148, + reshape_187, + unsqueeze_32, + add_149, + softmax_21, + transpose_131, + reshape_188, + matmul_109, + add_150, + reshape_189, + transpose_132, + reshape_190, + roll_21, + reshape_191, + full_22, + floor_40, + divide_40, + multiply_40, + add_151, + layer_norm_138, + layer_norm_139, + layer_norm_140, + matmul_110, + add_152, + gelu_21, + matmul_111, + add_153, + assign_235, + floor_41, + divide_41, + multiply_41, + add_154, + reshape_192, + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + assign_248, + concat_2, + reshape_193, + layer_norm_141, + layer_norm_142, + layer_norm_143, + matmul_112, + layer_norm_144, + layer_norm_145, + layer_norm_146, + reshape_194, + transpose_133, + reshape_195, + reshape_196, + matmul_113, + add_155, + transpose_134, + slice_22, + assign_255, + scale_22, + transpose_135, + matmul_114, + reshape_197, + index_select_22, + transpose_136, + unsqueeze_33, + softmax_22, + transpose_137, + reshape_198, + matmul_115, + add_156, + reshape_199, + transpose_138, + reshape_200, + reshape_201, + full_23, + floor_42, + divide_42, + multiply_42, + add_157, + layer_norm_147, + layer_norm_148, + layer_norm_149, + matmul_116, + add_158, + gelu_22, + matmul_117, + add_159, + assign_257, + floor_43, + divide_43, + multiply_43, + add_160, + layer_norm_150, + layer_norm_151, + layer_norm_152, + reshape_202, + roll_22, + transpose_139, + reshape_203, + reshape_204, + matmul_118, + add_161, + transpose_140, + slice_23, + assign_265, + scale_23, + transpose_141, + matmul_119, + reshape_205, + index_select_23, + transpose_142, + unsqueeze_34, + add_162, + reshape_206, + unsqueeze_35, + add_163, + softmax_23, + transpose_143, + reshape_207, + matmul_120, + add_164, + reshape_208, + transpose_144, + reshape_209, + roll_23, + reshape_210, + full_24, + floor_44, + divide_44, + multiply_44, + add_165, + layer_norm_153, + layer_norm_154, + layer_norm_155, + matmul_121, + add_166, + gelu_23, + matmul_122, + add_167, + assign_268, + floor_45, + divide_45, + multiply_45, + add_168, + layer_norm_156, + layer_norm_157, + transpose_145, + unsqueeze_36, + pool2d_0, + squeeze_0, + flatten_0, + matmul_123, + add_169, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/weight_meta.py new file mode 100644 index 00000000..69d1deec --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_0/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [1024, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [4096, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1024, 4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [1024, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1024, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [4096, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [1024, 4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1024, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [1024, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [2048, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [1024, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1024, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [256, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [256, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [256, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [1024, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [256, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [256, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [256, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [512, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [512, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [128, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [128, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [128, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [512, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [128, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [128, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [128, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [128, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/graph_net.json b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/input_meta.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/input_meta.py new file mode 100644 index 00000000..e81a4538 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [64, 3, 224, 224] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [169, 4] + dtype = "float32" + low = -8.34937 + high = 5.14773 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [169, 4] + dtype = "float32" + low = -6.26857 + high = 5.23506 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [169, 8] + dtype = "float32" + low = -6.01897 + high = 7.46121 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [169, 8] + dtype = "float32" + low = -7.64888 + high = 5.64611 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [169, 32] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [169, 32] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/model.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/model.py new file mode 100644 index 00000000..a84deeea --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/model.py @@ -0,0 +1,9747 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.shape64: (4xi64) <- (-1x3x224x224xf32) + shape64_0 = paddle._C_ops.shape64(data_0) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [1] + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + shape64_0, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_0 + + # pd_op.conv2d: (-1x128x56x56xf32) <- (-1x3x224x224xf32, 128x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_2 = [1, -1, 1, 1] + + # pd_op.reshape: (1x128x1x1xf32) <- (128xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_2) + del full_int_array_2, parameter_303 + + # pd_op.add: (-1x128x56x56xf32) <- (-1x128x56x56xf32, 1x128x1x1xf32) + add_1 = paddle._C_ops.add(conv2d_0, reshape_0) + del conv2d_0, reshape_0 + + # pd_op.shape64: (4xi64) <- (-1x128x56x56xf32) + shape64_1 = paddle._C_ops.shape64(add_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + shape64_1, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_1 + + # pd_op.flatten: (-1x128x3136xf32) <- (-1x128x56x56xf32) + flatten_0 = paddle._C_ops.flatten(add_1, 2, 3) + del add_1 + + # pd_op.transpose: (-1x3136x128xf32) <- (-1x128x3136xf32) + transpose_0 = paddle._C_ops.transpose(flatten_0, [0, 2, 1]) + del flatten_0 + + # pd_op.layer_norm: (-1x3136x128xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302, transpose_0 + + # pd_op.shape64: (3xi64) <- (-1x3136x128xf32) + shape64_2 = paddle._C_ops.shape64(layer_norm_0) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + shape64_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_2 + + # pd_op.layer_norm: (-1x3136x128xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x128xf32, 128xf32, 128xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("56"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_1 = paddle._C_ops.full( + [], float("128"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_0 = [slice_2, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_0 = paddle._C_ops.stack(combine_0, 0) + del combine_0 + + # pd_op.reshape: (-1x56x56x128xf32) <- (-1x3136x128xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, stack_0) + del layer_norm_3, stack_0 + + # pd_op.shape64: (4xi64) <- (-1x56x56x128xf32) + shape64_3 = paddle._C_ops.shape64(reshape_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + shape64_3, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_3 + + # pd_op.full: (xi64) <- () + full_2 = paddle._C_ops.full( + [], float("8"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_3 = paddle._C_ops.full( + [], float("7"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_1 = [slice_3, full_2, full_3, full_2, full_3, full_1] + del slice_3 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_1 = paddle._C_ops.stack(combine_1, 0) + del combine_1 + + # pd_op.reshape: (-1x8x7x8x7x128xf32) <- (-1x56x56x128xf32, 6xi64) + reshape_2 = paddle._C_ops.reshape(reshape_1, stack_1) + del reshape_1, stack_1 + + # pd_op.transpose: (-1x8x8x7x7x128xf32) <- (-1x8x7x8x7x128xf32) + transpose_1 = paddle._C_ops.transpose(reshape_2, [0, 1, 3, 2, 4, 5]) + del reshape_2 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_3 = [-1, 7, 7, 128] + + # pd_op.reshape: (-1x7x7x128xf32) <- (-1x8x8x7x7x128xf32, 4xi64) + reshape_3 = paddle._C_ops.reshape(transpose_1, full_int_array_3) + del transpose_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [-1, 49, 128] + + # pd_op.reshape: (-1x49x128xf32) <- (-1x7x7x128xf32, 3xi64) + reshape_4 = paddle._C_ops.reshape(reshape_3, full_int_array_4) + del reshape_3 + + # pd_op.shape64: (3xi64) <- (-1x49x128xf32) + shape64_4 = paddle._C_ops.shape64(reshape_4) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + shape64_4, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_4 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x128xf32, 128x384xf32) + matmul_0 = paddle._C_ops.matmul(reshape_4, parameter_298, False, False) + del parameter_298, reshape_4 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_297) + del matmul_0, parameter_297 + + # pd_op.full: (xi64) <- () + full_4 = paddle._C_ops.full( + [], float("49"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_5 = paddle._C_ops.full( + [], float("3"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_6 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_7 = paddle._C_ops.full( + [], float("32"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_2 = [slice_4, full_4, full_5, full_6, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_2 = paddle._C_ops.stack(combine_2, 0) + del combine_2 + + # pd_op.reshape: (-1x49x3x4x32xf32) <- (-1x49x384xf32, 5xi64) + reshape_5 = paddle._C_ops.reshape(add_2, stack_2) + del add_2, stack_2 + + # pd_op.transpose: (3x-1x4x49x32xf32) <- (-1x49x3x4x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_5, [2, 0, 3, 1, 4]) + del reshape_5 + + # pd_op.slice: (-1x4x49x32xf32) <- (3x-1x4x49x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_5 = [2] + + # pd_op.slice: (-1x4x49x32xf32) <- (3x-1x4x49x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_6 = [3] + + # pd_op.slice: (-1x4x49x32xf32) <- (3x-1x4x49x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_2 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (-1x4x49x32xf32) <- (-1x4x49x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_5, full_8, float("0"), True) + del slice_5 + + # pd_op.transpose: (-1x4x32x49xf32) <- (-1x4x49x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_6, [0, 1, 3, 2]) + del slice_6 + + # pd_op.matmul: (-1x4x49x49xf32) <- (-1x4x49x32xf32, -1x4x32x49xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + del scale_0, transpose_3 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [-1] + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_6 = paddle._C_ops.reshape(data_1, full_int_array_7) + del data_1 + + # pd_op.index_select: (2401x4xf32) <- (169x4xf32, 2401xi64) + index_select_0 = paddle._C_ops.index_select(data_2, reshape_6, 0) + del data_2, reshape_6 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_8 = [49, 49, -1] + + # pd_op.reshape: (49x49x4xf32) <- (2401x4xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(index_select_0, full_int_array_8) + del index_select_0 + + # pd_op.transpose: (4x49x49xf32) <- (49x49x4xf32) + transpose_4 = paddle._C_ops.transpose(reshape_7, [2, 0, 1]) + del reshape_7 + + # pd_op.unsqueeze: (1x4x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_0) + del transpose_4 + + # pd_op.add: (-1x4x49x49xf32) <- (-1x4x49x49xf32, 1x4x49x49xf32) + add_3 = paddle._C_ops.add(matmul_1, unsqueeze_0) + del matmul_1, unsqueeze_0 + + # pd_op.softmax: (-1x4x49x49xf32) <- (-1x4x49x49xf32) + softmax_0 = paddle._C_ops.softmax(add_3, -1) + del add_3 + + # pd_op.matmul: (-1x4x49x32xf32) <- (-1x4x49x49xf32, -1x4x49x32xf32) + matmul_2 = paddle._C_ops.matmul(softmax_0, slice_7, False, False) + del slice_7, softmax_0 + + # pd_op.transpose: (-1x49x4x32xf32) <- (-1x4x49x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_2, [0, 2, 1, 3]) + del matmul_2 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_3 = [slice_4, full_4, full_1] + del slice_4 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_3 = paddle._C_ops.stack(combine_3, 0) + del combine_3 + + # pd_op.reshape: (-1x49x128xf32) <- (-1x49x4x32xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(transpose_5, stack_3) + del stack_3, transpose_5 + + # pd_op.matmul: (-1x49x128xf32) <- (-1x49x128xf32, 128x128xf32) + matmul_3 = paddle._C_ops.matmul(reshape_8, parameter_296, False, False) + del parameter_296, reshape_8 + + # pd_op.add: (-1x49x128xf32) <- (-1x49x128xf32, 128xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_295) + del matmul_3, parameter_295 + + # pd_op.reshape: (-1x7x7x128xf32) <- (-1x49x128xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_4, full_int_array_3) + del add_4 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_9 = [-1, 8, 8, 7, 7, 128] + + # pd_op.reshape: (-1x8x8x7x7x128xf32) <- (-1x7x7x128xf32, 6xi64) + reshape_10 = paddle._C_ops.reshape(reshape_9, full_int_array_9) + del reshape_9 + + # pd_op.transpose: (-1x8x7x8x7x128xf32) <- (-1x8x8x7x7x128xf32) + transpose_6 = paddle._C_ops.transpose(reshape_10, [0, 1, 3, 2, 4, 5]) + del reshape_10 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [-1, 56, 56, 128] + + # pd_op.reshape: (-1x56x56x128xf32) <- (-1x8x7x8x7x128xf32, 4xi64) + reshape_11 = paddle._C_ops.reshape(transpose_6, full_int_array_10) + del transpose_6 + + # pd_op.full: (xi64) <- () + full_9 = paddle._C_ops.full( + [], float("3136"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_4 = [slice_2, full_9, full_1] + del slice_2 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_4 = paddle._C_ops.stack(combine_4, 0) + del combine_4 + + # pd_op.reshape: (-1x3136x128xf32) <- (-1x56x56x128xf32, 3xi64) + reshape_12 = paddle._C_ops.reshape(reshape_11, stack_4) + del reshape_11, stack_4 + + # pd_op.add: (-1x3136x128xf32) <- (-1x3136x128xf32, -1x3136x128xf32) + add_5 = paddle._C_ops.add(layer_norm_0, reshape_12) + del layer_norm_0, reshape_12 + + # pd_op.layer_norm: (-1x3136x128xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x128xf32, 128xf32, 128xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_5, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (-1x3136x512xf32) <- (-1x3136x128xf32, 128x512xf32) + matmul_4 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del layer_norm_6, parameter_292 + + # pd_op.add: (-1x3136x512xf32) <- (-1x3136x512xf32, 512xf32) + add_6 = paddle._C_ops.add(matmul_4, parameter_291) + del matmul_4, parameter_291 + + # pd_op.gelu: (-1x3136x512xf32) <- (-1x3136x512xf32) + gelu_0 = paddle._C_ops.gelu(add_6, False) + del add_6 + + # pd_op.matmul: (-1x3136x128xf32) <- (-1x3136x512xf32, 512x128xf32) + matmul_5 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del gelu_0, parameter_290 + + # pd_op.add: (-1x3136x128xf32) <- (-1x3136x128xf32, 128xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_289) + del matmul_5, parameter_289 + + # pd_op.add: (-1x3136x128xf32) <- (-1x3136x128xf32, -1x3136x128xf32) + add_8 = paddle._C_ops.add(add_5, add_7) + del add_5, add_7 + + # pd_op.shape64: (3xi64) <- (-1x3136x128xf32) + shape64_5 = paddle._C_ops.shape64(add_8) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + shape64_5, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_5 + + # pd_op.layer_norm: (-1x3136x128xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x128xf32, 128xf32, 128xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_5 = [slice_8, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_5 = paddle._C_ops.stack(combine_5, 0) + del combine_5 + + # pd_op.reshape: (-1x56x56x128xf32) <- (-1x3136x128xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(layer_norm_9, stack_5) + del layer_norm_9, stack_5 + + # pd_op.shape64: (4xi64) <- (-1x56x56x128xf32) + shape64_6 = paddle._C_ops.shape64(reshape_13) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + shape64_6, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_11 = [-3, -3] + + # pd_op.roll: (-1x56x56x128xf32) <- (-1x56x56x128xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_13, full_int_array_11, [1, 2]) + del reshape_13 + + # pd_op.shape64: (4xi64) <- (-1x56x56x128xf32) + shape64_7 = paddle._C_ops.shape64(roll_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + shape64_7, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_7 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_6 = [slice_10, full_2, full_3, full_2, full_3, full_1] + del slice_10 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_6 = paddle._C_ops.stack(combine_6, 0) + del combine_6 + + # pd_op.reshape: (-1x8x7x8x7x128xf32) <- (-1x56x56x128xf32, 6xi64) + reshape_14 = paddle._C_ops.reshape(roll_0, stack_6) + del roll_0, stack_6 + + # pd_op.transpose: (-1x8x8x7x7x128xf32) <- (-1x8x7x8x7x128xf32) + transpose_7 = paddle._C_ops.transpose(reshape_14, [0, 1, 3, 2, 4, 5]) + del reshape_14 + + # pd_op.reshape: (-1x7x7x128xf32) <- (-1x8x8x7x7x128xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(transpose_7, full_int_array_3) + del transpose_7 + + # pd_op.reshape: (-1x49x128xf32) <- (-1x7x7x128xf32, 3xi64) + reshape_16 = paddle._C_ops.reshape(reshape_15, full_int_array_4) + del full_int_array_4, reshape_15 + + # pd_op.full: (1x56x56x1xf32) <- () + full_10 = paddle._C_ops.full( + [1, 56, 56, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_12 = [0, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_13 = [-7, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_14 = [1, 1] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_10, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_10 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_15 = [0, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_16 = [-7, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_17 = [0, -3] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_18 = [-7, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_19 = [-7, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_20 = [-3, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [-3, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-3, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [2147483647, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [2147483647, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [2147483647, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_26 = [1, 8, 7, 8, 7, 1] + + # pd_op.reshape: (1x8x7x8x7x1xf32) <- (1x56x56x1xf32, 6xi64) + reshape_17 = paddle._C_ops.reshape(set_value__0, full_int_array_26) + del full_int_array_26 + + # pd_op.transpose: (1x8x8x7x7x1xf32) <- (1x8x7x8x7x1xf32) + transpose_8 = paddle._C_ops.transpose(reshape_17, [0, 1, 3, 2, 4, 5]) + del reshape_17 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_27 = [-1, 7, 7, 1] + + # pd_op.reshape: (64x7x7x1xf32) <- (1x8x8x7x7x1xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(transpose_8, full_int_array_27) + del transpose_8 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [-1, 49] + + # pd_op.reshape: (64x49xf32) <- (64x7x7x1xf32, 2xi64) + reshape_19 = paddle._C_ops.reshape(reshape_18, full_int_array_28) + del reshape_18 + + # pd_op.unsqueeze: (64x1x49xf32) <- (64x49xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(reshape_19, full_int_array_1) + + # pd_op.unsqueeze: (64x49x1xf32) <- (64x49xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(reshape_19, full_int_array_5) + del reshape_19 + + # pd_op.subtract: (64x49x49xf32) <- (64x1x49xf32, 64x49x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_1, unsqueeze_2) + del unsqueeze_1, unsqueeze_2 + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x49x49xb) <- (64x49x49xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_11) + + # pd_op.full: (64x49x49xf32) <- () + full_12 = paddle._C_ops.full( + [64, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_12, subtract_0) + del full_12, not_equal_0, subtract_0 + + # pd_op.equal: (64x49x49xb) <- (64x49x49xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_11) + + # pd_op.full: (64x49x49xf32) <- () + full_13 = paddle._C_ops.full( + [64, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_1 = paddle._C_ops.where(equal_0, full_13, where_0) + del equal_0, full_13, where_0 + + # pd_op.shape64: (3xi64) <- (-1x49x128xf32) + shape64_8 = paddle._C_ops.shape64(reshape_16) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + shape64_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_8 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x128xf32, 128x384xf32) + matmul_6 = paddle._C_ops.matmul(reshape_16, parameter_286, False, False) + del parameter_286, reshape_16 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_9 = paddle._C_ops.add(matmul_6, parameter_285) + del matmul_6, parameter_285 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_7 = [slice_11, full_4, full_5, full_6, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_7 = paddle._C_ops.stack(combine_7, 0) + del combine_7 + + # pd_op.reshape: (-1x49x3x4x32xf32) <- (-1x49x384xf32, 5xi64) + reshape_20 = paddle._C_ops.reshape(add_9, stack_7) + del add_9, stack_7 + + # pd_op.transpose: (3x-1x4x49x32xf32) <- (-1x49x3x4x32xf32) + transpose_9 = paddle._C_ops.transpose(reshape_20, [2, 0, 3, 1, 4]) + del reshape_20 + + # pd_op.slice: (-1x4x49x32xf32) <- (3x-1x4x49x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x4x49x32xf32) <- (3x-1x4x49x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x4x49x32xf32) <- (3x-1x4x49x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_9 + + # pd_op.scale: (-1x4x49x32xf32) <- (-1x4x49x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_12, full_8, float("0"), True) + del slice_12 + + # pd_op.transpose: (-1x4x32x49xf32) <- (-1x4x49x32xf32) + transpose_10 = paddle._C_ops.transpose(slice_13, [0, 1, 3, 2]) + del slice_13 + + # pd_op.matmul: (-1x4x49x49xf32) <- (-1x4x49x32xf32, -1x4x32x49xf32) + matmul_7 = paddle._C_ops.matmul(scale_1, transpose_10, False, False) + del scale_1, transpose_10 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_21 = paddle._C_ops.reshape(data_3, full_int_array_7) + del data_3 + + # pd_op.index_select: (2401x4xf32) <- (169x4xf32, 2401xi64) + index_select_1 = paddle._C_ops.index_select(data_4, reshape_21, 0) + del data_4, reshape_21 + + # pd_op.reshape: (49x49x4xf32) <- (2401x4xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(index_select_1, full_int_array_8) + del index_select_1 + + # pd_op.transpose: (4x49x49xf32) <- (49x49x4xf32) + transpose_11 = paddle._C_ops.transpose(reshape_22, [2, 0, 1]) + del reshape_22 + + # pd_op.unsqueeze: (1x4x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_11, full_int_array_0) + del transpose_11 + + # pd_op.add: (-1x4x49x49xf32) <- (-1x4x49x49xf32, 1x4x49x49xf32) + add_10 = paddle._C_ops.add(matmul_7, unsqueeze_3) + del matmul_7, unsqueeze_3 + + # pd_op.full: (xi64) <- () + full_14 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_0 = paddle._C_ops.floor_divide(slice_11, full_14) + del full_14 + + # pd_op.full: (xi64) <- () + full_15 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_8 = [floor_divide_0, full_15, full_6, full_4, full_4] + del floor_divide_0, full_15 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_8 = paddle._C_ops.stack(combine_8, 0) + del combine_8 + + # pd_op.reshape: (-1x64x4x49x49xf32) <- (-1x4x49x49xf32, 5xi64) + reshape_23 = paddle._C_ops.reshape(add_10, stack_8) + del add_10, stack_8 + + # pd_op.unsqueeze: (64x1x49x49xf32) <- (64x49x49xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(where_1, full_int_array_1) + del where_1 + + # pd_op.unsqueeze: (1x64x1x49x49xf32) <- (64x1x49x49xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_4, full_int_array_0) + del unsqueeze_4 + + # pd_op.add: (-1x64x4x49x49xf32) <- (-1x64x4x49x49xf32, 1x64x1x49x49xf32) + add_11 = paddle._C_ops.add(reshape_23, unsqueeze_5) + del reshape_23, unsqueeze_5 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_9 = [slice_11, full_6, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_9 = paddle._C_ops.stack(combine_9, 0) + del combine_9 + + # pd_op.reshape: (-1x4x49x49xf32) <- (-1x64x4x49x49xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_11, stack_9) + del add_11, stack_9 + + # pd_op.softmax: (-1x4x49x49xf32) <- (-1x4x49x49xf32) + softmax_1 = paddle._C_ops.softmax(reshape_24, -1) + del reshape_24 + + # pd_op.matmul: (-1x4x49x32xf32) <- (-1x4x49x49xf32, -1x4x49x32xf32) + matmul_8 = paddle._C_ops.matmul(softmax_1, slice_14, False, False) + del slice_14, softmax_1 + + # pd_op.transpose: (-1x49x4x32xf32) <- (-1x4x49x32xf32) + transpose_12 = paddle._C_ops.transpose(matmul_8, [0, 2, 1, 3]) + del matmul_8 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_10 = [slice_11, full_4, full_1] + del slice_11 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_10 = paddle._C_ops.stack(combine_10, 0) + del combine_10 + + # pd_op.reshape: (-1x49x128xf32) <- (-1x49x4x32xf32, 3xi64) + reshape_25 = paddle._C_ops.reshape(transpose_12, stack_10) + del stack_10, transpose_12 + + # pd_op.matmul: (-1x49x128xf32) <- (-1x49x128xf32, 128x128xf32) + matmul_9 = paddle._C_ops.matmul(reshape_25, parameter_284, False, False) + del parameter_284, reshape_25 + + # pd_op.add: (-1x49x128xf32) <- (-1x49x128xf32, 128xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_283) + del matmul_9, parameter_283 + + # pd_op.reshape: (-1x7x7x128xf32) <- (-1x49x128xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_12, full_int_array_3) + del add_12, full_int_array_3 + + # pd_op.reshape: (-1x8x8x7x7x128xf32) <- (-1x7x7x128xf32, 6xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, full_int_array_9) + del full_int_array_9, reshape_26 + + # pd_op.transpose: (-1x8x7x8x7x128xf32) <- (-1x8x8x7x7x128xf32) + transpose_13 = paddle._C_ops.transpose(reshape_27, [0, 1, 3, 2, 4, 5]) + del reshape_27 + + # pd_op.reshape: (-1x56x56x128xf32) <- (-1x8x7x8x7x128xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(transpose_13, full_int_array_10) + del full_int_array_10, transpose_13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [3, 3] + + # pd_op.roll: (-1x56x56x128xf32) <- (-1x56x56x128xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_28, full_int_array_29, [1, 2]) + del reshape_28 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_11 = [slice_8, full_9, full_1] + del full_9, slice_8 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_11 = paddle._C_ops.stack(combine_11, 0) + del combine_11 + + # pd_op.reshape: (-1x3136x128xf32) <- (-1x56x56x128xf32, 3xi64) + reshape_29 = paddle._C_ops.reshape(roll_1, stack_11) + del roll_1, stack_11 + + # pd_op.add: (-1x3136x128xf32) <- (-1x3136x128xf32, -1x3136x128xf32) + add_13 = paddle._C_ops.add(add_8, reshape_29) + del add_8, reshape_29 + + # pd_op.layer_norm: (-1x3136x128xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x128xf32, 128xf32, 128xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_13, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (-1x3136x512xf32) <- (-1x3136x128xf32, 128x512xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del layer_norm_12, parameter_280 + + # pd_op.add: (-1x3136x512xf32) <- (-1x3136x512xf32, 512xf32) + add_14 = paddle._C_ops.add(matmul_10, parameter_279) + del matmul_10, parameter_279 + + # pd_op.gelu: (-1x3136x512xf32) <- (-1x3136x512xf32) + gelu_1 = paddle._C_ops.gelu(add_14, False) + del add_14 + + # pd_op.matmul: (-1x3136x128xf32) <- (-1x3136x512xf32, 512x128xf32) + matmul_11 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del gelu_1, parameter_278 + + # pd_op.add: (-1x3136x128xf32) <- (-1x3136x128xf32, 128xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_277) + del matmul_11, parameter_277 + + # pd_op.add: (-1x3136x128xf32) <- (-1x3136x128xf32, -1x3136x128xf32) + add_16 = paddle._C_ops.add(add_13, add_15) + del add_13, add_15 + + # pd_op.shape64: (3xi64) <- (-1x3136x128xf32) + shape64_9 = paddle._C_ops.shape64(add_16) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + shape64_9, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_9 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_12 = [slice_15, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_12 = paddle._C_ops.stack(combine_12, 0) + del combine_12 + + # pd_op.reshape: (-1x56x56x128xf32) <- (-1x3136x128xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_16, stack_12) + del add_16, stack_12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_30 = [2, 2] + + # pd_op.strided_slice: (-1x28x28x128xf32) <- (-1x56x56x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_31 = [1, 0] + + # pd_op.strided_slice: (-1x28x28x128xf32) <- (-1x56x56x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [0, 1] + + # pd_op.strided_slice: (-1x28x28x128xf32) <- (-1x56x56x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x28x28x128xf32) <- (-1x56x56x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + + # pd_op.shape64: (4xi64) <- (-1x56x56x128xf32) + shape64_10 = paddle._C_ops.shape64(reshape_30) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + shape64_10, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_10 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_13 = [slice_16, full_0, full_0, full_1] + del full_0, full_1, slice_16 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_13 = paddle._C_ops.stack(combine_13, 0) + del combine_13 + + # pd_op.reshape: (-1x56x56x128xf32) <- (-1x56x56x128xf32, 4xi64) + reshape_31 = paddle._C_ops.reshape(reshape_30, stack_13) + del reshape_30, stack_13 + + # pd_op.full: (1xi32) <- () + full_16 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([-1x28x28x128xf32, -1x28x28x128xf32, -1x28x28x128xf32, -1x28x28x128xf32]) <- (-1x28x28x128xf32, -1x28x28x128xf32, -1x28x28x128xf32, -1x28x28x128xf32) + combine_14 = [ + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + ] + del strided_slice_0, strided_slice_1, strided_slice_2, strided_slice_3 + + # pd_op.concat: (-1x28x28x512xf32) <- ([-1x28x28x128xf32, -1x28x28x128xf32, -1x28x28x128xf32, -1x28x28x128xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_14, full_16) + del combine_14 + + # pd_op.full: (xi64) <- () + full_17 = paddle._C_ops.full( + [], float("-1"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_18 = paddle._C_ops.full( + [], float("512"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_15 = [slice_15, full_17, full_18] + del slice_15 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_14 = paddle._C_ops.stack(combine_15, 0) + del combine_15 + + # pd_op.reshape: (-1x-1x512xf32) <- (-1x28x28x512xf32, 3xi64) + reshape_32 = paddle._C_ops.reshape(concat_0, stack_14) + del concat_0, stack_14 + + # pd_op.layer_norm: (-1x-1x512xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x512xf32, 512xf32, 512xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_32, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276, reshape_32 + + # pd_op.matmul: (-1x-1x256xf32) <- (-1x-1x512xf32, 512x256xf32) + matmul_12 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del layer_norm_15, parameter_274 + + # pd_op.shape64: (3xi64) <- (-1x-1x256xf32) + shape64_11 = paddle._C_ops.shape64(matmul_12) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + shape64_11, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_11 + + # pd_op.shape64: (3xi64) <- (-1x-1x256xf32) + shape64_12 = paddle._C_ops.shape64(matmul_12) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + shape64_12, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_12 + + # pd_op.layer_norm: (-1x-1x256xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x256xf32, 256xf32, 256xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_12, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full: (xi64) <- () + full_19 = paddle._C_ops.full( + [], float("28"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_20 = paddle._C_ops.full( + [], float("256"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_16 = [slice_17, full_19, full_19, full_20] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_15 = paddle._C_ops.stack(combine_16, 0) + del combine_16 + + # pd_op.reshape: (-1x28x28x256xf32) <- (-1x-1x256xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(layer_norm_18, stack_15) + del layer_norm_18, stack_15 + + # pd_op.shape64: (4xi64) <- (-1x28x28x256xf32) + shape64_13 = paddle._C_ops.shape64(reshape_33) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + shape64_13, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_13 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_17 = [slice_19, full_6, full_3, full_6, full_3, full_20] + del slice_19 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_16 = paddle._C_ops.stack(combine_17, 0) + del combine_17 + + # pd_op.reshape: (-1x4x7x4x7x256xf32) <- (-1x28x28x256xf32, 6xi64) + reshape_34 = paddle._C_ops.reshape(reshape_33, stack_16) + del reshape_33, stack_16 + + # pd_op.transpose: (-1x4x4x7x7x256xf32) <- (-1x4x7x4x7x256xf32) + transpose_14 = paddle._C_ops.transpose(reshape_34, [0, 1, 3, 2, 4, 5]) + del reshape_34 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_33 = [-1, 7, 7, 256] + + # pd_op.reshape: (-1x7x7x256xf32) <- (-1x4x4x7x7x256xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_14, full_int_array_33) + del transpose_14 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_34 = [-1, 49, 256] + + # pd_op.reshape: (-1x49x256xf32) <- (-1x7x7x256xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(reshape_35, full_int_array_34) + del reshape_35 + + # pd_op.shape64: (3xi64) <- (-1x49x256xf32) + shape64_14 = paddle._C_ops.shape64(reshape_36) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + shape64_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_14 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x256xf32, 256x768xf32) + matmul_13 = paddle._C_ops.matmul(reshape_36, parameter_271, False, False) + del parameter_271, reshape_36 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_17 = paddle._C_ops.add(matmul_13, parameter_270) + del matmul_13, parameter_270 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_18 = [slice_20, full_4, full_5, full_2, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_17 = paddle._C_ops.stack(combine_18, 0) + del combine_18 + + # pd_op.reshape: (-1x49x3x8x32xf32) <- (-1x49x768xf32, 5xi64) + reshape_37 = paddle._C_ops.reshape(add_17, stack_17) + del add_17, stack_17 + + # pd_op.transpose: (3x-1x8x49x32xf32) <- (-1x49x3x8x32xf32) + transpose_15 = paddle._C_ops.transpose(reshape_37, [2, 0, 3, 1, 4]) + del reshape_37 + + # pd_op.slice: (-1x8x49x32xf32) <- (3x-1x8x49x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x8x49x32xf32) <- (3x-1x8x49x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x8x49x32xf32) <- (3x-1x8x49x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_15 + + # pd_op.scale: (-1x8x49x32xf32) <- (-1x8x49x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_21, full_8, float("0"), True) + del slice_21 + + # pd_op.transpose: (-1x8x32x49xf32) <- (-1x8x49x32xf32) + transpose_16 = paddle._C_ops.transpose(slice_22, [0, 1, 3, 2]) + del slice_22 + + # pd_op.matmul: (-1x8x49x49xf32) <- (-1x8x49x32xf32, -1x8x32x49xf32) + matmul_14 = paddle._C_ops.matmul(scale_2, transpose_16, False, False) + del scale_2, transpose_16 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_38 = paddle._C_ops.reshape(data_5, full_int_array_7) + del data_5 + + # pd_op.index_select: (2401x8xf32) <- (169x8xf32, 2401xi64) + index_select_2 = paddle._C_ops.index_select(data_6, reshape_38, 0) + del data_6, reshape_38 + + # pd_op.reshape: (49x49x8xf32) <- (2401x8xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(index_select_2, full_int_array_8) + del index_select_2 + + # pd_op.transpose: (8x49x49xf32) <- (49x49x8xf32) + transpose_17 = paddle._C_ops.transpose(reshape_39, [2, 0, 1]) + del reshape_39 + + # pd_op.unsqueeze: (1x8x49x49xf32) <- (8x49x49xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_17, full_int_array_0) + del transpose_17 + + # pd_op.add: (-1x8x49x49xf32) <- (-1x8x49x49xf32, 1x8x49x49xf32) + add_18 = paddle._C_ops.add(matmul_14, unsqueeze_6) + del matmul_14, unsqueeze_6 + + # pd_op.softmax: (-1x8x49x49xf32) <- (-1x8x49x49xf32) + softmax_2 = paddle._C_ops.softmax(add_18, -1) + del add_18 + + # pd_op.matmul: (-1x8x49x32xf32) <- (-1x8x49x49xf32, -1x8x49x32xf32) + matmul_15 = paddle._C_ops.matmul(softmax_2, slice_23, False, False) + del slice_23, softmax_2 + + # pd_op.transpose: (-1x49x8x32xf32) <- (-1x8x49x32xf32) + transpose_18 = paddle._C_ops.transpose(matmul_15, [0, 2, 1, 3]) + del matmul_15 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_19 = [slice_20, full_4, full_20] + del slice_20 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_18 = paddle._C_ops.stack(combine_19, 0) + del combine_19 + + # pd_op.reshape: (-1x49x256xf32) <- (-1x49x8x32xf32, 3xi64) + reshape_40 = paddle._C_ops.reshape(transpose_18, stack_18) + del stack_18, transpose_18 + + # pd_op.matmul: (-1x49x256xf32) <- (-1x49x256xf32, 256x256xf32) + matmul_16 = paddle._C_ops.matmul(reshape_40, parameter_269, False, False) + del parameter_269, reshape_40 + + # pd_op.add: (-1x49x256xf32) <- (-1x49x256xf32, 256xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_268) + del matmul_16, parameter_268 + + # pd_op.reshape: (-1x7x7x256xf32) <- (-1x49x256xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_19, full_int_array_33) + del add_19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [-1, 4, 4, 7, 7, 256] + + # pd_op.reshape: (-1x4x4x7x7x256xf32) <- (-1x7x7x256xf32, 6xi64) + reshape_42 = paddle._C_ops.reshape(reshape_41, full_int_array_35) + del reshape_41 + + # pd_op.transpose: (-1x4x7x4x7x256xf32) <- (-1x4x4x7x7x256xf32) + transpose_19 = paddle._C_ops.transpose(reshape_42, [0, 1, 3, 2, 4, 5]) + del reshape_42 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 28, 28, 256] + + # pd_op.reshape: (-1x28x28x256xf32) <- (-1x4x7x4x7x256xf32, 4xi64) + reshape_43 = paddle._C_ops.reshape(transpose_19, full_int_array_36) + del transpose_19 + + # pd_op.full: (xi64) <- () + full_21 = paddle._C_ops.full( + [], float("784"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_20 = [slice_17, full_21, full_20] + del slice_17 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_19 = paddle._C_ops.stack(combine_20, 0) + del combine_20 + + # pd_op.reshape: (-1x784x256xf32) <- (-1x28x28x256xf32, 3xi64) + reshape_44 = paddle._C_ops.reshape(reshape_43, stack_19) + del reshape_43, stack_19 + + # pd_op.add: (-1x784x256xf32) <- (-1x-1x256xf32, -1x784x256xf32) + add_20 = paddle._C_ops.add(matmul_12, reshape_44) + del matmul_12, reshape_44 + + # pd_op.layer_norm: (-1x784x256xf32, -1x784xf32, -1x784xf32) <- (-1x784x256xf32, 256xf32, 256xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (-1x784x1024xf32) <- (-1x784x256xf32, 256x1024xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del layer_norm_21, parameter_265 + + # pd_op.add: (-1x784x1024xf32) <- (-1x784x1024xf32, 1024xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_264) + del matmul_17, parameter_264 + + # pd_op.gelu: (-1x784x1024xf32) <- (-1x784x1024xf32) + gelu_2 = paddle._C_ops.gelu(add_21, False) + del add_21 + + # pd_op.matmul: (-1x784x256xf32) <- (-1x784x1024xf32, 1024x256xf32) + matmul_18 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del gelu_2, parameter_263 + + # pd_op.add: (-1x784x256xf32) <- (-1x784x256xf32, 256xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_262) + del matmul_18, parameter_262 + + # pd_op.add: (-1x784x256xf32) <- (-1x784x256xf32, -1x784x256xf32) + add_23 = paddle._C_ops.add(add_20, add_22) + del add_20, add_22 + + # pd_op.shape64: (3xi64) <- (-1x784x256xf32) + shape64_15 = paddle._C_ops.shape64(add_23) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + shape64_15, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_15 + + # pd_op.layer_norm: (-1x784x256xf32, -1x784xf32, -1x784xf32) <- (-1x784x256xf32, 256xf32, 256xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_23, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_21 = [slice_24, full_19, full_19, full_20] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_20 = paddle._C_ops.stack(combine_21, 0) + del combine_21 + + # pd_op.reshape: (-1x28x28x256xf32) <- (-1x784x256xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(layer_norm_24, stack_20) + del layer_norm_24, stack_20 + + # pd_op.shape64: (4xi64) <- (-1x28x28x256xf32) + shape64_16 = paddle._C_ops.shape64(reshape_45) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + shape64_16, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_16 + + # pd_op.roll: (-1x28x28x256xf32) <- (-1x28x28x256xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_45, full_int_array_11, [1, 2]) + del reshape_45 + + # pd_op.shape64: (4xi64) <- (-1x28x28x256xf32) + shape64_17 = paddle._C_ops.shape64(roll_2) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + shape64_17, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_17 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_22 = [slice_26, full_6, full_3, full_6, full_3, full_20] + del slice_26 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_21 = paddle._C_ops.stack(combine_22, 0) + del combine_22 + + # pd_op.reshape: (-1x4x7x4x7x256xf32) <- (-1x28x28x256xf32, 6xi64) + reshape_46 = paddle._C_ops.reshape(roll_2, stack_21) + del roll_2, stack_21 + + # pd_op.transpose: (-1x4x4x7x7x256xf32) <- (-1x4x7x4x7x256xf32) + transpose_20 = paddle._C_ops.transpose(reshape_46, [0, 1, 3, 2, 4, 5]) + del reshape_46 + + # pd_op.reshape: (-1x7x7x256xf32) <- (-1x4x4x7x7x256xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(transpose_20, full_int_array_33) + del transpose_20 + + # pd_op.reshape: (-1x49x256xf32) <- (-1x7x7x256xf32, 3xi64) + reshape_48 = paddle._C_ops.reshape(reshape_47, full_int_array_34) + del full_int_array_34, reshape_47 + + # pd_op.full: (1x28x28x1xf32) <- () + full_22 = paddle._C_ops.full( + [1, 28, 28, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_22, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_22 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_37 = [1, 4, 7, 4, 7, 1] + + # pd_op.reshape: (1x4x7x4x7x1xf32) <- (1x28x28x1xf32, 6xi64) + reshape_49 = paddle._C_ops.reshape(set_value__1, full_int_array_37) + del full_int_array_37 + + # pd_op.transpose: (1x4x4x7x7x1xf32) <- (1x4x7x4x7x1xf32) + transpose_21 = paddle._C_ops.transpose(reshape_49, [0, 1, 3, 2, 4, 5]) + del reshape_49 + + # pd_op.reshape: (16x7x7x1xf32) <- (1x4x4x7x7x1xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(transpose_21, full_int_array_27) + del transpose_21 + + # pd_op.reshape: (16x49xf32) <- (16x7x7x1xf32, 2xi64) + reshape_51 = paddle._C_ops.reshape(reshape_50, full_int_array_28) + del reshape_50 + + # pd_op.unsqueeze: (16x1x49xf32) <- (16x49xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(reshape_51, full_int_array_1) + + # pd_op.unsqueeze: (16x49x1xf32) <- (16x49xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(reshape_51, full_int_array_5) + del reshape_51 + + # pd_op.subtract: (16x49x49xf32) <- (16x1x49xf32, 16x49x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_7, unsqueeze_8) + del unsqueeze_7, unsqueeze_8 + + # pd_op.not_equal: (16x49x49xb) <- (16x49x49xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_11) + + # pd_op.full: (16x49x49xf32) <- () + full_23 = paddle._C_ops.full( + [16, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_23, subtract_1) + del full_23, not_equal_1, subtract_1 + + # pd_op.equal: (16x49x49xb) <- (16x49x49xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_11) + + # pd_op.full: (16x49x49xf32) <- () + full_24 = paddle._C_ops.full( + [16, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_3 = paddle._C_ops.where(equal_1, full_24, where_2) + del equal_1, full_24, where_2 + + # pd_op.shape64: (3xi64) <- (-1x49x256xf32) + shape64_18 = paddle._C_ops.shape64(reshape_48) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + shape64_18, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_18 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x256xf32, 256x768xf32) + matmul_19 = paddle._C_ops.matmul(reshape_48, parameter_259, False, False) + del parameter_259, reshape_48 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_24 = paddle._C_ops.add(matmul_19, parameter_258) + del matmul_19, parameter_258 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_23 = [slice_27, full_4, full_5, full_2, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_22 = paddle._C_ops.stack(combine_23, 0) + del combine_23 + + # pd_op.reshape: (-1x49x3x8x32xf32) <- (-1x49x768xf32, 5xi64) + reshape_52 = paddle._C_ops.reshape(add_24, stack_22) + del add_24, stack_22 + + # pd_op.transpose: (3x-1x8x49x32xf32) <- (-1x49x3x8x32xf32) + transpose_22 = paddle._C_ops.transpose(reshape_52, [2, 0, 3, 1, 4]) + del reshape_52 + + # pd_op.slice: (-1x8x49x32xf32) <- (3x-1x8x49x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x8x49x32xf32) <- (3x-1x8x49x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x8x49x32xf32) <- (3x-1x8x49x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_22 + + # pd_op.scale: (-1x8x49x32xf32) <- (-1x8x49x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_28, full_8, float("0"), True) + del slice_28 + + # pd_op.transpose: (-1x8x32x49xf32) <- (-1x8x49x32xf32) + transpose_23 = paddle._C_ops.transpose(slice_29, [0, 1, 3, 2]) + del slice_29 + + # pd_op.matmul: (-1x8x49x49xf32) <- (-1x8x49x32xf32, -1x8x32x49xf32) + matmul_20 = paddle._C_ops.matmul(scale_3, transpose_23, False, False) + del scale_3, transpose_23 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_53 = paddle._C_ops.reshape(data_7, full_int_array_7) + del data_7 + + # pd_op.index_select: (2401x8xf32) <- (169x8xf32, 2401xi64) + index_select_3 = paddle._C_ops.index_select(data_8, reshape_53, 0) + del data_8, reshape_53 + + # pd_op.reshape: (49x49x8xf32) <- (2401x8xf32, 3xi64) + reshape_54 = paddle._C_ops.reshape(index_select_3, full_int_array_8) + del index_select_3 + + # pd_op.transpose: (8x49x49xf32) <- (49x49x8xf32) + transpose_24 = paddle._C_ops.transpose(reshape_54, [2, 0, 1]) + del reshape_54 + + # pd_op.unsqueeze: (1x8x49x49xf32) <- (8x49x49xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_24, full_int_array_0) + del transpose_24 + + # pd_op.add: (-1x8x49x49xf32) <- (-1x8x49x49xf32, 1x8x49x49xf32) + add_25 = paddle._C_ops.add(matmul_20, unsqueeze_9) + del matmul_20, unsqueeze_9 + + # pd_op.full: (xi64) <- () + full_25 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_1 = paddle._C_ops.floor_divide(slice_27, full_25) + del full_25 + + # pd_op.full: (xi64) <- () + full_26 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_24 = [floor_divide_1, full_26, full_2, full_4, full_4] + del floor_divide_1 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_23 = paddle._C_ops.stack(combine_24, 0) + del combine_24 + + # pd_op.reshape: (-1x16x8x49x49xf32) <- (-1x8x49x49xf32, 5xi64) + reshape_55 = paddle._C_ops.reshape(add_25, stack_23) + del add_25, stack_23 + + # pd_op.unsqueeze: (16x1x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(where_3, full_int_array_1) + del where_3 + + # pd_op.unsqueeze: (1x16x1x49x49xf32) <- (16x1x49x49xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_10, full_int_array_0) + del unsqueeze_10 + + # pd_op.add: (-1x16x8x49x49xf32) <- (-1x16x8x49x49xf32, 1x16x1x49x49xf32) + add_26 = paddle._C_ops.add(reshape_55, unsqueeze_11) + del reshape_55, unsqueeze_11 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_25 = [slice_27, full_2, full_4, full_4] + del full_2 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_24 = paddle._C_ops.stack(combine_25, 0) + del combine_25 + + # pd_op.reshape: (-1x8x49x49xf32) <- (-1x16x8x49x49xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(add_26, stack_24) + del add_26, stack_24 + + # pd_op.softmax: (-1x8x49x49xf32) <- (-1x8x49x49xf32) + softmax_3 = paddle._C_ops.softmax(reshape_56, -1) + del reshape_56 + + # pd_op.matmul: (-1x8x49x32xf32) <- (-1x8x49x49xf32, -1x8x49x32xf32) + matmul_21 = paddle._C_ops.matmul(softmax_3, slice_30, False, False) + del slice_30, softmax_3 + + # pd_op.transpose: (-1x49x8x32xf32) <- (-1x8x49x32xf32) + transpose_25 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_26 = [slice_27, full_4, full_20] + del slice_27 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_25 = paddle._C_ops.stack(combine_26, 0) + del combine_26 + + # pd_op.reshape: (-1x49x256xf32) <- (-1x49x8x32xf32, 3xi64) + reshape_57 = paddle._C_ops.reshape(transpose_25, stack_25) + del stack_25, transpose_25 + + # pd_op.matmul: (-1x49x256xf32) <- (-1x49x256xf32, 256x256xf32) + matmul_22 = paddle._C_ops.matmul(reshape_57, parameter_257, False, False) + del parameter_257, reshape_57 + + # pd_op.add: (-1x49x256xf32) <- (-1x49x256xf32, 256xf32) + add_27 = paddle._C_ops.add(matmul_22, parameter_256) + del matmul_22, parameter_256 + + # pd_op.reshape: (-1x7x7x256xf32) <- (-1x49x256xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(add_27, full_int_array_33) + del add_27, full_int_array_33 + + # pd_op.reshape: (-1x4x4x7x7x256xf32) <- (-1x7x7x256xf32, 6xi64) + reshape_59 = paddle._C_ops.reshape(reshape_58, full_int_array_35) + del full_int_array_35, reshape_58 + + # pd_op.transpose: (-1x4x7x4x7x256xf32) <- (-1x4x4x7x7x256xf32) + transpose_26 = paddle._C_ops.transpose(reshape_59, [0, 1, 3, 2, 4, 5]) + del reshape_59 + + # pd_op.reshape: (-1x28x28x256xf32) <- (-1x4x7x4x7x256xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(transpose_26, full_int_array_36) + del full_int_array_36, transpose_26 + + # pd_op.roll: (-1x28x28x256xf32) <- (-1x28x28x256xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_60, full_int_array_29, [1, 2]) + del reshape_60 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_27 = [slice_24, full_21, full_20] + del full_21, slice_24 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_26 = paddle._C_ops.stack(combine_27, 0) + del combine_27 + + # pd_op.reshape: (-1x784x256xf32) <- (-1x28x28x256xf32, 3xi64) + reshape_61 = paddle._C_ops.reshape(roll_3, stack_26) + del roll_3, stack_26 + + # pd_op.add: (-1x784x256xf32) <- (-1x784x256xf32, -1x784x256xf32) + add_28 = paddle._C_ops.add(add_23, reshape_61) + del add_23, reshape_61 + + # pd_op.layer_norm: (-1x784x256xf32, -1x784xf32, -1x784xf32) <- (-1x784x256xf32, 256xf32, 256xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_28, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (-1x784x1024xf32) <- (-1x784x256xf32, 256x1024xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del layer_norm_27, parameter_253 + + # pd_op.add: (-1x784x1024xf32) <- (-1x784x1024xf32, 1024xf32) + add_29 = paddle._C_ops.add(matmul_23, parameter_252) + del matmul_23, parameter_252 + + # pd_op.gelu: (-1x784x1024xf32) <- (-1x784x1024xf32) + gelu_3 = paddle._C_ops.gelu(add_29, False) + del add_29 + + # pd_op.matmul: (-1x784x256xf32) <- (-1x784x1024xf32, 1024x256xf32) + matmul_24 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del gelu_3, parameter_251 + + # pd_op.add: (-1x784x256xf32) <- (-1x784x256xf32, 256xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_250) + del matmul_24, parameter_250 + + # pd_op.add: (-1x784x256xf32) <- (-1x784x256xf32, -1x784x256xf32) + add_31 = paddle._C_ops.add(add_28, add_30) + del add_28, add_30 + + # pd_op.shape64: (3xi64) <- (-1x784x256xf32) + shape64_19 = paddle._C_ops.shape64(add_31) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + shape64_19, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_19 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_28 = [slice_31, full_19, full_19, full_20] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_27 = paddle._C_ops.stack(combine_28, 0) + del combine_28 + + # pd_op.reshape: (-1x28x28x256xf32) <- (-1x784x256xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(add_31, stack_27) + del add_31, stack_27 + + # pd_op.strided_slice: (-1x14x14x256xf32) <- (-1x28x28x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x14x14x256xf32) <- (-1x28x28x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x14x14x256xf32) <- (-1x28x28x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x14x14x256xf32) <- (-1x28x28x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + + # pd_op.shape64: (4xi64) <- (-1x28x28x256xf32) + shape64_20 = paddle._C_ops.shape64(reshape_62) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + shape64_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_20 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_29 = [slice_32, full_19, full_19, full_20] + del full_19, full_20, slice_32 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_28 = paddle._C_ops.stack(combine_29, 0) + del combine_29 + + # pd_op.reshape: (-1x28x28x256xf32) <- (-1x28x28x256xf32, 4xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, stack_28) + del reshape_62, stack_28 + + # builtin.combine: ([-1x14x14x256xf32, -1x14x14x256xf32, -1x14x14x256xf32, -1x14x14x256xf32]) <- (-1x14x14x256xf32, -1x14x14x256xf32, -1x14x14x256xf32, -1x14x14x256xf32) + combine_30 = [ + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + ] + del strided_slice_4, strided_slice_5, strided_slice_6, strided_slice_7 + + # pd_op.concat: (-1x14x14x1024xf32) <- ([-1x14x14x256xf32, -1x14x14x256xf32, -1x14x14x256xf32, -1x14x14x256xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_30, full_16) + del combine_30 + + # pd_op.full: (xi64) <- () + full_27 = paddle._C_ops.full( + [], float("1024"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_31 = [slice_31, full_17, full_27] + del slice_31 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_29 = paddle._C_ops.stack(combine_31, 0) + del combine_31 + + # pd_op.reshape: (-1x-1x1024xf32) <- (-1x14x14x1024xf32, 3xi64) + reshape_64 = paddle._C_ops.reshape(concat_1, stack_29) + del concat_1, stack_29 + + # pd_op.layer_norm: (-1x-1x1024xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1024xf32, 1024xf32, 1024xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_64, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249, reshape_64 + + # pd_op.matmul: (-1x-1x512xf32) <- (-1x-1x1024xf32, 1024x512xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del layer_norm_30, parameter_247 + + # pd_op.shape64: (3xi64) <- (-1x-1x512xf32) + shape64_21 = paddle._C_ops.shape64(matmul_25) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + shape64_21, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_21 + + # pd_op.shape64: (3xi64) <- (-1x-1x512xf32) + shape64_22 = paddle._C_ops.shape64(matmul_25) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + shape64_22, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_22 + + # pd_op.layer_norm: (-1x-1x512xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x512xf32, 512xf32, 512xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_25, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full: (xi64) <- () + full_28 = paddle._C_ops.full( + [], float("14"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_32 = [slice_33, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_30 = paddle._C_ops.stack(combine_32, 0) + del combine_32 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x-1x512xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(layer_norm_33, stack_30) + del layer_norm_33, stack_30 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_23 = paddle._C_ops.shape64(reshape_65) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + shape64_23, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_23 + + # pd_op.full: (xi64) <- () + full_29 = paddle._C_ops.full( + [], float("2"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_33 = [slice_35, full_29, full_3, full_29, full_3, full_18] + del slice_35 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_31 = paddle._C_ops.stack(combine_33, 0) + del combine_33 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, stack_31) + del reshape_65, stack_31 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_27 = paddle._C_ops.transpose(reshape_66, [0, 1, 3, 2, 4, 5]) + del reshape_66 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_38 = [-1, 7, 7, 512] + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_67 = paddle._C_ops.reshape(transpose_27, full_int_array_38) + del transpose_27 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_39 = [-1, 49, 512] + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_68 = paddle._C_ops.reshape(reshape_67, full_int_array_39) + del reshape_67 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_24 = paddle._C_ops.shape64(reshape_68) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + shape64_24, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_24 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_26 = paddle._C_ops.matmul(reshape_68, parameter_244, False, False) + del parameter_244, reshape_68 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_32 = paddle._C_ops.add(matmul_26, parameter_243) + del matmul_26, parameter_243 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_34 = [slice_36, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_32 = paddle._C_ops.stack(combine_34, 0) + del combine_34 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_69 = paddle._C_ops.reshape(add_32, stack_32) + del add_32, stack_32 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_28 = paddle._C_ops.transpose(reshape_69, [2, 0, 3, 1, 4]) + del reshape_69 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_28 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_37, full_8, float("0"), True) + del slice_37 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_29 = paddle._C_ops.transpose(slice_38, [0, 1, 3, 2]) + del slice_38 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_27 = paddle._C_ops.matmul(scale_4, transpose_29, False, False) + del scale_4, transpose_29 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_70 = paddle._C_ops.reshape(data_9, full_int_array_7) + del data_9 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_4 = paddle._C_ops.index_select(data_10, reshape_70, 0) + del data_10, reshape_70 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_71 = paddle._C_ops.reshape(index_select_4, full_int_array_8) + del index_select_4 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_30 = paddle._C_ops.transpose(reshape_71, [2, 0, 1]) + del reshape_71 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_30, full_int_array_0) + del transpose_30 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_33 = paddle._C_ops.add(matmul_27, unsqueeze_12) + del matmul_27, unsqueeze_12 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_4 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_28 = paddle._C_ops.matmul(softmax_4, slice_39, False, False) + del slice_39, softmax_4 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_31 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_35 = [slice_36, full_4, full_18] + del slice_36 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_33 = paddle._C_ops.stack(combine_35, 0) + del combine_35 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(transpose_31, stack_33) + del stack_33, transpose_31 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_29 = paddle._C_ops.matmul(reshape_72, parameter_242, False, False) + del parameter_242, reshape_72 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_34 = paddle._C_ops.add(matmul_29, parameter_241) + del matmul_29, parameter_241 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(add_34, full_int_array_38) + del add_34 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_40 = [-1, 2, 2, 7, 7, 512] + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_74 = paddle._C_ops.reshape(reshape_73, full_int_array_40) + del reshape_73 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_32 = paddle._C_ops.transpose(reshape_74, [0, 1, 3, 2, 4, 5]) + del reshape_74 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_41 = [-1, 14, 14, 512] + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_75 = paddle._C_ops.reshape(transpose_32, full_int_array_41) + del transpose_32 + + # pd_op.full: (xi64) <- () + full_30 = paddle._C_ops.full( + [], float("196"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_36 = [slice_33, full_30, full_18] + del slice_33 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_34 = paddle._C_ops.stack(combine_36, 0) + del combine_36 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_76 = paddle._C_ops.reshape(reshape_75, stack_34) + del reshape_75, stack_34 + + # pd_op.add: (-1x196x512xf32) <- (-1x-1x512xf32, -1x196x512xf32) + add_35 = paddle._C_ops.add(matmul_25, reshape_76) + del matmul_25, reshape_76 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del layer_norm_36, parameter_238 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_36 = paddle._C_ops.add(matmul_30, parameter_237) + del matmul_30, parameter_237 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_4 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_31 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del gelu_4, parameter_236 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_37 = paddle._C_ops.add(matmul_31, parameter_235) + del matmul_31, parameter_235 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_38 = paddle._C_ops.add(add_35, add_37) + del add_35, add_37 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_25 = paddle._C_ops.shape64(add_38) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + shape64_25, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_25 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_37 = [slice_40, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_35 = paddle._C_ops.stack(combine_37, 0) + del combine_37 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(layer_norm_39, stack_35) + del layer_norm_39, stack_35 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_26 = paddle._C_ops.shape64(reshape_77) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + shape64_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_26 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_77, full_int_array_11, [1, 2]) + del reshape_77 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_27 = paddle._C_ops.shape64(roll_4) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + shape64_27, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_38 = [slice_42, full_29, full_3, full_29, full_3, full_18] + del slice_42 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_36 = paddle._C_ops.stack(combine_38, 0) + del combine_38 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_78 = paddle._C_ops.reshape(roll_4, stack_36) + del roll_4, stack_36 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_33 = paddle._C_ops.transpose(reshape_78, [0, 1, 3, 2, 4, 5]) + del reshape_78 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_33, full_int_array_38) + del transpose_33 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, full_int_array_39) + del reshape_79 + + # pd_op.full: (1x14x14x1xf32) <- () + full_31 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_31, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_42 = [1, 2, 7, 2, 7, 1] + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_81 = paddle._C_ops.reshape(set_value__2, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_34 = paddle._C_ops.transpose(reshape_81, [0, 1, 3, 2, 4, 5]) + del reshape_81 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_34, full_int_array_27) + del transpose_34 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_28) + del reshape_82 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(reshape_83, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(reshape_83, full_int_array_5) + del reshape_83 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_13, unsqueeze_14) + del unsqueeze_13, unsqueeze_14 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_11) + + # pd_op.full: (4x49x49xf32) <- () + full_32 = paddle._C_ops.full( + [4, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_32, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_11) + + # pd_op.full: (4x49x49xf32) <- () + full_33 = paddle._C_ops.full( + [4, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_5 = paddle._C_ops.where(equal_2, full_33, where_4) + del equal_2, where_4 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_28 = paddle._C_ops.shape64(reshape_80) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + shape64_28, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_28 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_32 = paddle._C_ops.matmul(reshape_80, parameter_232, False, False) + del parameter_232, reshape_80 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_39 = paddle._C_ops.add(matmul_32, parameter_231) + del matmul_32, parameter_231 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_39 = [slice_43, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_37 = paddle._C_ops.stack(combine_39, 0) + del combine_39 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_84 = paddle._C_ops.reshape(add_39, stack_37) + del add_39, stack_37 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_35 = paddle._C_ops.transpose(reshape_84, [2, 0, 3, 1, 4]) + del reshape_84 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_35 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_44, full_8, float("0"), True) + del slice_44 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_36 = paddle._C_ops.transpose(slice_45, [0, 1, 3, 2]) + del slice_45 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_33 = paddle._C_ops.matmul(scale_5, transpose_36, False, False) + del scale_5, transpose_36 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_85 = paddle._C_ops.reshape(data_11, full_int_array_7) + del data_11 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_5 = paddle._C_ops.index_select(data_12, reshape_85, 0) + del data_12, reshape_85 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(index_select_5, full_int_array_8) + del index_select_5 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_37 = paddle._C_ops.transpose(reshape_86, [2, 0, 1]) + del reshape_86 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_37, full_int_array_0) + del transpose_37 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_40 = paddle._C_ops.add(matmul_33, unsqueeze_15) + del matmul_33, unsqueeze_15 + + # pd_op.full: (xi64) <- () + full_34 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_2 = paddle._C_ops.floor_divide(slice_43, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_40 = [floor_divide_2, full_6, full_26, full_4, full_4] + del floor_divide_2 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_38 = paddle._C_ops.stack(combine_40, 0) + del combine_40 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_87 = paddle._C_ops.reshape(add_40, stack_38) + del add_40, stack_38 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(where_5, full_int_array_1) + del where_5 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_16, full_int_array_0) + del unsqueeze_16 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_41 = paddle._C_ops.add(reshape_87, unsqueeze_17) + del reshape_87, unsqueeze_17 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_41 = [slice_43, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_39 = paddle._C_ops.stack(combine_41, 0) + del combine_41 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(add_41, stack_39) + del add_41, stack_39 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_5 = paddle._C_ops.softmax(reshape_88, -1) + del reshape_88 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_34 = paddle._C_ops.matmul(softmax_5, slice_46, False, False) + del slice_46, softmax_5 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_38 = paddle._C_ops.transpose(matmul_34, [0, 2, 1, 3]) + del matmul_34 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_42 = [slice_43, full_4, full_18] + del slice_43 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_40 = paddle._C_ops.stack(combine_42, 0) + del combine_42 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(transpose_38, stack_40) + del stack_40, transpose_38 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_35 = paddle._C_ops.matmul(reshape_89, parameter_230, False, False) + del parameter_230, reshape_89 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_42 = paddle._C_ops.add(matmul_35, parameter_229) + del matmul_35, parameter_229 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(add_42, full_int_array_38) + del add_42 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_91 = paddle._C_ops.reshape(reshape_90, full_int_array_40) + del reshape_90 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_39 = paddle._C_ops.transpose(reshape_91, [0, 1, 3, 2, 4, 5]) + del reshape_91 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(transpose_39, full_int_array_41) + del transpose_39 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_92, full_int_array_29, [1, 2]) + del reshape_92 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_43 = [slice_40, full_30, full_18] + del slice_40 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_41 = paddle._C_ops.stack(combine_43, 0) + del combine_43 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_93 = paddle._C_ops.reshape(roll_5, stack_41) + del roll_5, stack_41 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_43 = paddle._C_ops.add(add_38, reshape_93) + del add_38, reshape_93 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_43, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_36 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del layer_norm_42, parameter_226 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_44 = paddle._C_ops.add(matmul_36, parameter_225) + del matmul_36, parameter_225 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_5 = paddle._C_ops.gelu(add_44, False) + del add_44 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_37 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del gelu_5, parameter_224 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_45 = paddle._C_ops.add(matmul_37, parameter_223) + del matmul_37, parameter_223 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_46 = paddle._C_ops.add(add_43, add_45) + del add_43, add_45 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_29 = paddle._C_ops.shape64(add_46) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + shape64_29, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_29 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_46, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_44 = [slice_47, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_42 = paddle._C_ops.stack(combine_44, 0) + del combine_44 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_94 = paddle._C_ops.reshape(layer_norm_45, stack_42) + del layer_norm_45, stack_42 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_30 = paddle._C_ops.shape64(reshape_94) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + shape64_30, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_30 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_45 = [slice_48, full_29, full_3, full_29, full_3, full_18] + del slice_48 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_43 = paddle._C_ops.stack(combine_45, 0) + del combine_45 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_95 = paddle._C_ops.reshape(reshape_94, stack_43) + del reshape_94, stack_43 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_40 = paddle._C_ops.transpose(reshape_95, [0, 1, 3, 2, 4, 5]) + del reshape_95 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_40, full_int_array_38) + del transpose_40 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, full_int_array_39) + del reshape_96 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_31 = paddle._C_ops.shape64(reshape_97) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + shape64_31, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_31 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_38 = paddle._C_ops.matmul(reshape_97, parameter_220, False, False) + del parameter_220, reshape_97 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_47 = paddle._C_ops.add(matmul_38, parameter_219) + del matmul_38, parameter_219 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_46 = [slice_49, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_44 = paddle._C_ops.stack(combine_46, 0) + del combine_46 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_98 = paddle._C_ops.reshape(add_47, stack_44) + del add_47, stack_44 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_41 = paddle._C_ops.transpose(reshape_98, [2, 0, 3, 1, 4]) + del reshape_98 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_41 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_50, full_8, float("0"), True) + del slice_50 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_42 = paddle._C_ops.transpose(slice_51, [0, 1, 3, 2]) + del slice_51 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_39 = paddle._C_ops.matmul(scale_6, transpose_42, False, False) + del scale_6, transpose_42 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_99 = paddle._C_ops.reshape(data_13, full_int_array_7) + del data_13 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_6 = paddle._C_ops.index_select(data_14, reshape_99, 0) + del data_14, reshape_99 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(index_select_6, full_int_array_8) + del index_select_6 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_43 = paddle._C_ops.transpose(reshape_100, [2, 0, 1]) + del reshape_100 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_43, full_int_array_0) + del transpose_43 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_48 = paddle._C_ops.add(matmul_39, unsqueeze_18) + del matmul_39, unsqueeze_18 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_6 = paddle._C_ops.softmax(add_48, -1) + del add_48 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_40 = paddle._C_ops.matmul(softmax_6, slice_52, False, False) + del slice_52, softmax_6 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_44 = paddle._C_ops.transpose(matmul_40, [0, 2, 1, 3]) + del matmul_40 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_47 = [slice_49, full_4, full_18] + del slice_49 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_45 = paddle._C_ops.stack(combine_47, 0) + del combine_47 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_101 = paddle._C_ops.reshape(transpose_44, stack_45) + del stack_45, transpose_44 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_41 = paddle._C_ops.matmul(reshape_101, parameter_218, False, False) + del parameter_218, reshape_101 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_49 = paddle._C_ops.add(matmul_41, parameter_217) + del matmul_41, parameter_217 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_102 = paddle._C_ops.reshape(add_49, full_int_array_38) + del add_49 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_103 = paddle._C_ops.reshape(reshape_102, full_int_array_40) + del reshape_102 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_45 = paddle._C_ops.transpose(reshape_103, [0, 1, 3, 2, 4, 5]) + del reshape_103 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(transpose_45, full_int_array_41) + del transpose_45 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_48 = [slice_47, full_30, full_18] + del slice_47 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_46 = paddle._C_ops.stack(combine_48, 0) + del combine_48 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_105 = paddle._C_ops.reshape(reshape_104, stack_46) + del reshape_104, stack_46 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_50 = paddle._C_ops.add(add_46, reshape_105) + del add_46, reshape_105 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_50, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del layer_norm_48, parameter_214 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_51 = paddle._C_ops.add(matmul_42, parameter_213) + del matmul_42, parameter_213 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_6 = paddle._C_ops.gelu(add_51, False) + del add_51 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_43 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del gelu_6, parameter_212 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_52 = paddle._C_ops.add(matmul_43, parameter_211) + del matmul_43, parameter_211 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_53 = paddle._C_ops.add(add_50, add_52) + del add_50, add_52 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_32 = paddle._C_ops.shape64(add_53) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + shape64_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_32 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_49 = [slice_53, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_47 = paddle._C_ops.stack(combine_49, 0) + del combine_49 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_106 = paddle._C_ops.reshape(layer_norm_51, stack_47) + del layer_norm_51, stack_47 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_33 = paddle._C_ops.shape64(reshape_106) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + shape64_33, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_33 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_106, full_int_array_11, [1, 2]) + del reshape_106 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_34 = paddle._C_ops.shape64(roll_6) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + shape64_34, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_34 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_50 = [slice_55, full_29, full_3, full_29, full_3, full_18] + del slice_55 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_48 = paddle._C_ops.stack(combine_50, 0) + del combine_50 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_107 = paddle._C_ops.reshape(roll_6, stack_48) + del roll_6, stack_48 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_46 = paddle._C_ops.transpose(reshape_107, [0, 1, 3, 2, 4, 5]) + del reshape_107 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_46, full_int_array_38) + del transpose_46 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_39) + del reshape_108 + + # pd_op.full: (1x14x14x1xf32) <- () + full_35 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_35, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_35 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_110 = paddle._C_ops.reshape(set_value__3, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_47 = paddle._C_ops.transpose(reshape_110, [0, 1, 3, 2, 4, 5]) + del reshape_110 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_111 = paddle._C_ops.reshape(transpose_47, full_int_array_27) + del transpose_47 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_112 = paddle._C_ops.reshape(reshape_111, full_int_array_28) + del reshape_111 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(reshape_112, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(reshape_112, full_int_array_5) + del reshape_112 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_19, unsqueeze_20) + del unsqueeze_19, unsqueeze_20 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_32, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_7 = paddle._C_ops.where(equal_3, full_33, where_6) + del equal_3, where_6 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_35 = paddle._C_ops.shape64(reshape_109) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + shape64_35, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_35 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_44 = paddle._C_ops.matmul(reshape_109, parameter_208, False, False) + del parameter_208, reshape_109 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_54 = paddle._C_ops.add(matmul_44, parameter_207) + del matmul_44, parameter_207 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_51 = [slice_56, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_49 = paddle._C_ops.stack(combine_51, 0) + del combine_51 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_113 = paddle._C_ops.reshape(add_54, stack_49) + del add_54, stack_49 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_48 = paddle._C_ops.transpose(reshape_113, [2, 0, 3, 1, 4]) + del reshape_113 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_48 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_57, full_8, float("0"), True) + del slice_57 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_49 = paddle._C_ops.transpose(slice_58, [0, 1, 3, 2]) + del slice_58 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_45 = paddle._C_ops.matmul(scale_7, transpose_49, False, False) + del scale_7, transpose_49 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_114 = paddle._C_ops.reshape(data_15, full_int_array_7) + del data_15 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_7 = paddle._C_ops.index_select(data_16, reshape_114, 0) + del data_16, reshape_114 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_115 = paddle._C_ops.reshape(index_select_7, full_int_array_8) + del index_select_7 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_50 = paddle._C_ops.transpose(reshape_115, [2, 0, 1]) + del reshape_115 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_50, full_int_array_0) + del transpose_50 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_55 = paddle._C_ops.add(matmul_45, unsqueeze_21) + del matmul_45, unsqueeze_21 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_3 = paddle._C_ops.floor_divide(slice_56, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_52 = [floor_divide_3, full_6, full_26, full_4, full_4] + del floor_divide_3 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_50 = paddle._C_ops.stack(combine_52, 0) + del combine_52 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_116 = paddle._C_ops.reshape(add_55, stack_50) + del add_55, stack_50 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(where_7, full_int_array_1) + del where_7 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_22, full_int_array_0) + del unsqueeze_22 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_56 = paddle._C_ops.add(reshape_116, unsqueeze_23) + del reshape_116, unsqueeze_23 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_53 = [slice_56, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_51 = paddle._C_ops.stack(combine_53, 0) + del combine_53 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_117 = paddle._C_ops.reshape(add_56, stack_51) + del add_56, stack_51 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_7 = paddle._C_ops.softmax(reshape_117, -1) + del reshape_117 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_46 = paddle._C_ops.matmul(softmax_7, slice_59, False, False) + del slice_59, softmax_7 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_51 = paddle._C_ops.transpose(matmul_46, [0, 2, 1, 3]) + del matmul_46 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_54 = [slice_56, full_4, full_18] + del slice_56 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_52 = paddle._C_ops.stack(combine_54, 0) + del combine_54 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_118 = paddle._C_ops.reshape(transpose_51, stack_52) + del stack_52, transpose_51 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_47 = paddle._C_ops.matmul(reshape_118, parameter_206, False, False) + del parameter_206, reshape_118 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_57 = paddle._C_ops.add(matmul_47, parameter_205) + del matmul_47, parameter_205 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_119 = paddle._C_ops.reshape(add_57, full_int_array_38) + del add_57 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_120 = paddle._C_ops.reshape(reshape_119, full_int_array_40) + del reshape_119 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_52 = paddle._C_ops.transpose(reshape_120, [0, 1, 3, 2, 4, 5]) + del reshape_120 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(transpose_52, full_int_array_41) + del transpose_52 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_121, full_int_array_29, [1, 2]) + del reshape_121 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_55 = [slice_53, full_30, full_18] + del slice_53 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_53 = paddle._C_ops.stack(combine_55, 0) + del combine_55 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_122 = paddle._C_ops.reshape(roll_7, stack_53) + del roll_7, stack_53 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_58 = paddle._C_ops.add(add_53, reshape_122) + del add_53, reshape_122 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_58, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del layer_norm_54, parameter_202 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_59 = paddle._C_ops.add(matmul_48, parameter_201) + del matmul_48, parameter_201 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_7 = paddle._C_ops.gelu(add_59, False) + del add_59 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_49 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del gelu_7, parameter_200 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_60 = paddle._C_ops.add(matmul_49, parameter_199) + del matmul_49, parameter_199 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_61 = paddle._C_ops.add(add_58, add_60) + del add_58, add_60 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_36 = paddle._C_ops.shape64(add_61) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + shape64_36, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_36 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_61, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_56 = [slice_60, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_54 = paddle._C_ops.stack(combine_56, 0) + del combine_56 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_123 = paddle._C_ops.reshape(layer_norm_57, stack_54) + del layer_norm_57, stack_54 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_37 = paddle._C_ops.shape64(reshape_123) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + shape64_37, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_37 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_57 = [slice_61, full_29, full_3, full_29, full_3, full_18] + del slice_61 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_55 = paddle._C_ops.stack(combine_57, 0) + del combine_57 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_124 = paddle._C_ops.reshape(reshape_123, stack_55) + del reshape_123, stack_55 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_53 = paddle._C_ops.transpose(reshape_124, [0, 1, 3, 2, 4, 5]) + del reshape_124 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_53, full_int_array_38) + del transpose_53 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_39) + del reshape_125 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_38 = paddle._C_ops.shape64(reshape_126) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + shape64_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_38 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_50 = paddle._C_ops.matmul(reshape_126, parameter_196, False, False) + del parameter_196, reshape_126 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_62 = paddle._C_ops.add(matmul_50, parameter_195) + del matmul_50, parameter_195 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_58 = [slice_62, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_56 = paddle._C_ops.stack(combine_58, 0) + del combine_58 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_127 = paddle._C_ops.reshape(add_62, stack_56) + del add_62, stack_56 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_54 = paddle._C_ops.transpose(reshape_127, [2, 0, 3, 1, 4]) + del reshape_127 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_54 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_63, full_8, float("0"), True) + del slice_63 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_55 = paddle._C_ops.transpose(slice_64, [0, 1, 3, 2]) + del slice_64 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_51 = paddle._C_ops.matmul(scale_8, transpose_55, False, False) + del scale_8, transpose_55 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_128 = paddle._C_ops.reshape(data_17, full_int_array_7) + del data_17 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_8 = paddle._C_ops.index_select(data_18, reshape_128, 0) + del data_18, reshape_128 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_129 = paddle._C_ops.reshape(index_select_8, full_int_array_8) + del index_select_8 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_56 = paddle._C_ops.transpose(reshape_129, [2, 0, 1]) + del reshape_129 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_56, full_int_array_0) + del transpose_56 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_63 = paddle._C_ops.add(matmul_51, unsqueeze_24) + del matmul_51, unsqueeze_24 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_8 = paddle._C_ops.softmax(add_63, -1) + del add_63 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_52 = paddle._C_ops.matmul(softmax_8, slice_65, False, False) + del slice_65, softmax_8 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_57 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_59 = [slice_62, full_4, full_18] + del slice_62 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_57 = paddle._C_ops.stack(combine_59, 0) + del combine_59 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_130 = paddle._C_ops.reshape(transpose_57, stack_57) + del stack_57, transpose_57 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_53 = paddle._C_ops.matmul(reshape_130, parameter_194, False, False) + del parameter_194, reshape_130 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_64 = paddle._C_ops.add(matmul_53, parameter_193) + del matmul_53, parameter_193 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_131 = paddle._C_ops.reshape(add_64, full_int_array_38) + del add_64 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_132 = paddle._C_ops.reshape(reshape_131, full_int_array_40) + del reshape_131 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_58 = paddle._C_ops.transpose(reshape_132, [0, 1, 3, 2, 4, 5]) + del reshape_132 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_58, full_int_array_41) + del transpose_58 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_60 = [slice_60, full_30, full_18] + del slice_60 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_58 = paddle._C_ops.stack(combine_60, 0) + del combine_60 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, stack_58) + del reshape_133, stack_58 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_65 = paddle._C_ops.add(add_61, reshape_134) + del add_61, reshape_134 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_65, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del layer_norm_60, parameter_190 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_66 = paddle._C_ops.add(matmul_54, parameter_189) + del matmul_54, parameter_189 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_8 = paddle._C_ops.gelu(add_66, False) + del add_66 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_55 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del gelu_8, parameter_188 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_67 = paddle._C_ops.add(matmul_55, parameter_187) + del matmul_55, parameter_187 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_68 = paddle._C_ops.add(add_65, add_67) + del add_65, add_67 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_39 = paddle._C_ops.shape64(add_68) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + shape64_39, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_39 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_68, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_61 = [slice_66, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_59 = paddle._C_ops.stack(combine_61, 0) + del combine_61 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_135 = paddle._C_ops.reshape(layer_norm_63, stack_59) + del layer_norm_63, stack_59 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_40 = paddle._C_ops.shape64(reshape_135) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + shape64_40, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_40 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_135, full_int_array_11, [1, 2]) + del reshape_135 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_41 = paddle._C_ops.shape64(roll_8) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + shape64_41, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_41 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_62 = [slice_68, full_29, full_3, full_29, full_3, full_18] + del slice_68 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_60 = paddle._C_ops.stack(combine_62, 0) + del combine_62 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_136 = paddle._C_ops.reshape(roll_8, stack_60) + del roll_8, stack_60 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_59 = paddle._C_ops.transpose(reshape_136, [0, 1, 3, 2, 4, 5]) + del reshape_136 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_137 = paddle._C_ops.reshape(transpose_59, full_int_array_38) + del transpose_59 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_138 = paddle._C_ops.reshape(reshape_137, full_int_array_39) + del reshape_137 + + # pd_op.full: (1x14x14x1xf32) <- () + full_36 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_36, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_139 = paddle._C_ops.reshape(set_value__4, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_60 = paddle._C_ops.transpose(reshape_139, [0, 1, 3, 2, 4, 5]) + del reshape_139 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_140 = paddle._C_ops.reshape(transpose_60, full_int_array_27) + del transpose_60 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_141 = paddle._C_ops.reshape(reshape_140, full_int_array_28) + del reshape_140 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(reshape_141, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(reshape_141, full_int_array_5) + del reshape_141 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_25, unsqueeze_26) + del unsqueeze_25, unsqueeze_26 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_32, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_9 = paddle._C_ops.where(equal_4, full_33, where_8) + del equal_4, where_8 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_42 = paddle._C_ops.shape64(reshape_138) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + shape64_42, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_42 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_56 = paddle._C_ops.matmul(reshape_138, parameter_184, False, False) + del parameter_184, reshape_138 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_69 = paddle._C_ops.add(matmul_56, parameter_183) + del matmul_56, parameter_183 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_63 = [slice_69, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_61 = paddle._C_ops.stack(combine_63, 0) + del combine_63 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_142 = paddle._C_ops.reshape(add_69, stack_61) + del add_69, stack_61 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_61 = paddle._C_ops.transpose(reshape_142, [2, 0, 3, 1, 4]) + del reshape_142 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_72 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_61 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_70, full_8, float("0"), True) + del slice_70 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_62 = paddle._C_ops.transpose(slice_71, [0, 1, 3, 2]) + del slice_71 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_57 = paddle._C_ops.matmul(scale_9, transpose_62, False, False) + del scale_9, transpose_62 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_143 = paddle._C_ops.reshape(data_19, full_int_array_7) + del data_19 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_9 = paddle._C_ops.index_select(data_20, reshape_143, 0) + del data_20, reshape_143 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_144 = paddle._C_ops.reshape(index_select_9, full_int_array_8) + del index_select_9 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_63 = paddle._C_ops.transpose(reshape_144, [2, 0, 1]) + del reshape_144 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_63, full_int_array_0) + del transpose_63 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_70 = paddle._C_ops.add(matmul_57, unsqueeze_27) + del matmul_57, unsqueeze_27 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_4 = paddle._C_ops.floor_divide(slice_69, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_64 = [floor_divide_4, full_6, full_26, full_4, full_4] + del floor_divide_4 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_62 = paddle._C_ops.stack(combine_64, 0) + del combine_64 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_145 = paddle._C_ops.reshape(add_70, stack_62) + del add_70, stack_62 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(where_9, full_int_array_1) + del where_9 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_28, full_int_array_0) + del unsqueeze_28 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_71 = paddle._C_ops.add(reshape_145, unsqueeze_29) + del reshape_145, unsqueeze_29 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_65 = [slice_69, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_63 = paddle._C_ops.stack(combine_65, 0) + del combine_65 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_71, stack_63) + del add_71, stack_63 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_9 = paddle._C_ops.softmax(reshape_146, -1) + del reshape_146 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_58 = paddle._C_ops.matmul(softmax_9, slice_72, False, False) + del slice_72, softmax_9 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_64 = paddle._C_ops.transpose(matmul_58, [0, 2, 1, 3]) + del matmul_58 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_66 = [slice_69, full_4, full_18] + del slice_69 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_64 = paddle._C_ops.stack(combine_66, 0) + del combine_66 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_147 = paddle._C_ops.reshape(transpose_64, stack_64) + del stack_64, transpose_64 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_59 = paddle._C_ops.matmul(reshape_147, parameter_182, False, False) + del parameter_182, reshape_147 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_72 = paddle._C_ops.add(matmul_59, parameter_181) + del matmul_59, parameter_181 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_148 = paddle._C_ops.reshape(add_72, full_int_array_38) + del add_72 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_149 = paddle._C_ops.reshape(reshape_148, full_int_array_40) + del reshape_148 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_65 = paddle._C_ops.transpose(reshape_149, [0, 1, 3, 2, 4, 5]) + del reshape_149 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_65, full_int_array_41) + del transpose_65 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_150, full_int_array_29, [1, 2]) + del reshape_150 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_67 = [slice_66, full_30, full_18] + del slice_66 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_65 = paddle._C_ops.stack(combine_67, 0) + del combine_67 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(roll_9, stack_65) + del roll_9, stack_65 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_73 = paddle._C_ops.add(add_68, reshape_151) + del add_68, reshape_151 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del layer_norm_66, parameter_178 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_74 = paddle._C_ops.add(matmul_60, parameter_177) + del matmul_60, parameter_177 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_9 = paddle._C_ops.gelu(add_74, False) + del add_74 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_61 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del gelu_9, parameter_176 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_75 = paddle._C_ops.add(matmul_61, parameter_175) + del matmul_61, parameter_175 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_76 = paddle._C_ops.add(add_73, add_75) + del add_73, add_75 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_43 = paddle._C_ops.shape64(add_76) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_73 = paddle._C_ops.slice( + shape64_43, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_43 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_68 = [slice_73, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_66 = paddle._C_ops.stack(combine_68, 0) + del combine_68 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_152 = paddle._C_ops.reshape(layer_norm_69, stack_66) + del layer_norm_69, stack_66 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_44 = paddle._C_ops.shape64(reshape_152) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_74 = paddle._C_ops.slice( + shape64_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_44 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_69 = [slice_74, full_29, full_3, full_29, full_3, full_18] + del slice_74 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_67 = paddle._C_ops.stack(combine_69, 0) + del combine_69 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_153 = paddle._C_ops.reshape(reshape_152, stack_67) + del reshape_152, stack_67 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_66 = paddle._C_ops.transpose(reshape_153, [0, 1, 3, 2, 4, 5]) + del reshape_153 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_154 = paddle._C_ops.reshape(transpose_66, full_int_array_38) + del transpose_66 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_155 = paddle._C_ops.reshape(reshape_154, full_int_array_39) + del reshape_154 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_45 = paddle._C_ops.shape64(reshape_155) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_75 = paddle._C_ops.slice( + shape64_45, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_45 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_62 = paddle._C_ops.matmul(reshape_155, parameter_172, False, False) + del parameter_172, reshape_155 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_77 = paddle._C_ops.add(matmul_62, parameter_171) + del matmul_62, parameter_171 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_70 = [slice_75, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_68 = paddle._C_ops.stack(combine_70, 0) + del combine_70 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_156 = paddle._C_ops.reshape(add_77, stack_68) + del add_77, stack_68 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_67 = paddle._C_ops.transpose(reshape_156, [2, 0, 3, 1, 4]) + del reshape_156 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_76 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_77 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_78 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_67 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_76, full_8, float("0"), True) + del slice_76 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_68 = paddle._C_ops.transpose(slice_77, [0, 1, 3, 2]) + del slice_77 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_63 = paddle._C_ops.matmul(scale_10, transpose_68, False, False) + del scale_10, transpose_68 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_157 = paddle._C_ops.reshape(data_21, full_int_array_7) + del data_21 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_10 = paddle._C_ops.index_select(data_22, reshape_157, 0) + del data_22, reshape_157 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_158 = paddle._C_ops.reshape(index_select_10, full_int_array_8) + del index_select_10 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_69 = paddle._C_ops.transpose(reshape_158, [2, 0, 1]) + del reshape_158 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_69, full_int_array_0) + del transpose_69 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_78 = paddle._C_ops.add(matmul_63, unsqueeze_30) + del matmul_63, unsqueeze_30 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_10 = paddle._C_ops.softmax(add_78, -1) + del add_78 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_64 = paddle._C_ops.matmul(softmax_10, slice_78, False, False) + del slice_78, softmax_10 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_70 = paddle._C_ops.transpose(matmul_64, [0, 2, 1, 3]) + del matmul_64 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_71 = [slice_75, full_4, full_18] + del slice_75 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_69 = paddle._C_ops.stack(combine_71, 0) + del combine_71 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_159 = paddle._C_ops.reshape(transpose_70, stack_69) + del stack_69, transpose_70 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_65 = paddle._C_ops.matmul(reshape_159, parameter_170, False, False) + del parameter_170, reshape_159 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_79 = paddle._C_ops.add(matmul_65, parameter_169) + del matmul_65, parameter_169 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_160 = paddle._C_ops.reshape(add_79, full_int_array_38) + del add_79 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_161 = paddle._C_ops.reshape(reshape_160, full_int_array_40) + del reshape_160 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_71 = paddle._C_ops.transpose(reshape_161, [0, 1, 3, 2, 4, 5]) + del reshape_161 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_162 = paddle._C_ops.reshape(transpose_71, full_int_array_41) + del transpose_71 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_72 = [slice_73, full_30, full_18] + del slice_73 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_70 = paddle._C_ops.stack(combine_72, 0) + del combine_72 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_163 = paddle._C_ops.reshape(reshape_162, stack_70) + del reshape_162, stack_70 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_80 = paddle._C_ops.add(add_76, reshape_163) + del add_76, reshape_163 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_80, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del layer_norm_72, parameter_166 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_81 = paddle._C_ops.add(matmul_66, parameter_165) + del matmul_66, parameter_165 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_10 = paddle._C_ops.gelu(add_81, False) + del add_81 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_67 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del gelu_10, parameter_164 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_82 = paddle._C_ops.add(matmul_67, parameter_163) + del matmul_67, parameter_163 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_83 = paddle._C_ops.add(add_80, add_82) + del add_80, add_82 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_46 = paddle._C_ops.shape64(add_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_79 = paddle._C_ops.slice( + shape64_46, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_46 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_83, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_73 = [slice_79, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_71 = paddle._C_ops.stack(combine_73, 0) + del combine_73 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(layer_norm_75, stack_71) + del layer_norm_75, stack_71 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_47 = paddle._C_ops.shape64(reshape_164) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_80 = paddle._C_ops.slice( + shape64_47, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_47 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_164, full_int_array_11, [1, 2]) + del reshape_164 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_48 = paddle._C_ops.shape64(roll_10) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_81 = paddle._C_ops.slice( + shape64_48, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_48 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_74 = [slice_81, full_29, full_3, full_29, full_3, full_18] + del slice_81 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_72 = paddle._C_ops.stack(combine_74, 0) + del combine_74 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_165 = paddle._C_ops.reshape(roll_10, stack_72) + del roll_10, stack_72 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_72 = paddle._C_ops.transpose(reshape_165, [0, 1, 3, 2, 4, 5]) + del reshape_165 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(transpose_72, full_int_array_38) + del transpose_72 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_167 = paddle._C_ops.reshape(reshape_166, full_int_array_39) + del reshape_166 + + # pd_op.full: (1x14x14x1xf32) <- () + full_37 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_37, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_168 = paddle._C_ops.reshape(set_value__5, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_73 = paddle._C_ops.transpose(reshape_168, [0, 1, 3, 2, 4, 5]) + del reshape_168 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_169 = paddle._C_ops.reshape(transpose_73, full_int_array_27) + del transpose_73 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_170 = paddle._C_ops.reshape(reshape_169, full_int_array_28) + del reshape_169 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(reshape_170, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(reshape_170, full_int_array_5) + del reshape_170 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_31, unsqueeze_32) + del unsqueeze_31, unsqueeze_32 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_32, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_11 = paddle._C_ops.where(equal_5, full_33, where_10) + del equal_5, where_10 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_49 = paddle._C_ops.shape64(reshape_167) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_82 = paddle._C_ops.slice( + shape64_49, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_49 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_68 = paddle._C_ops.matmul(reshape_167, parameter_160, False, False) + del parameter_160, reshape_167 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_84 = paddle._C_ops.add(matmul_68, parameter_159) + del matmul_68, parameter_159 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_75 = [slice_82, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_73 = paddle._C_ops.stack(combine_75, 0) + del combine_75 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_171 = paddle._C_ops.reshape(add_84, stack_73) + del add_84, stack_73 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_171, [2, 0, 3, 1, 4]) + del reshape_171 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_83 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_84 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_85 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_74 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_83, full_8, float("0"), True) + del slice_83 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_84, [0, 1, 3, 2]) + del slice_84 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_69 = paddle._C_ops.matmul(scale_11, transpose_75, False, False) + del scale_11, transpose_75 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_172 = paddle._C_ops.reshape(data_23, full_int_array_7) + del data_23 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_11 = paddle._C_ops.index_select(data_24, reshape_172, 0) + del data_24, reshape_172 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_173 = paddle._C_ops.reshape(index_select_11, full_int_array_8) + del index_select_11 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_76 = paddle._C_ops.transpose(reshape_173, [2, 0, 1]) + del reshape_173 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_0) + del transpose_76 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_85 = paddle._C_ops.add(matmul_69, unsqueeze_33) + del matmul_69, unsqueeze_33 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_5 = paddle._C_ops.floor_divide(slice_82, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_76 = [floor_divide_5, full_6, full_26, full_4, full_4] + del floor_divide_5 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_74 = paddle._C_ops.stack(combine_76, 0) + del combine_76 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_174 = paddle._C_ops.reshape(add_85, stack_74) + del add_85, stack_74 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(where_11, full_int_array_1) + del where_11 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_34, full_int_array_0) + del unsqueeze_34 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_86 = paddle._C_ops.add(reshape_174, unsqueeze_35) + del reshape_174, unsqueeze_35 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_77 = [slice_82, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_75 = paddle._C_ops.stack(combine_77, 0) + del combine_77 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(add_86, stack_75) + del add_86, stack_75 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_11 = paddle._C_ops.softmax(reshape_175, -1) + del reshape_175 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_70 = paddle._C_ops.matmul(softmax_11, slice_85, False, False) + del slice_85, softmax_11 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_70, [0, 2, 1, 3]) + del matmul_70 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_78 = [slice_82, full_4, full_18] + del slice_82 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_76 = paddle._C_ops.stack(combine_78, 0) + del combine_78 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_176 = paddle._C_ops.reshape(transpose_77, stack_76) + del stack_76, transpose_77 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_71 = paddle._C_ops.matmul(reshape_176, parameter_158, False, False) + del parameter_158, reshape_176 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_87 = paddle._C_ops.add(matmul_71, parameter_157) + del matmul_71, parameter_157 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_177 = paddle._C_ops.reshape(add_87, full_int_array_38) + del add_87 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_178 = paddle._C_ops.reshape(reshape_177, full_int_array_40) + del reshape_177 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_78 = paddle._C_ops.transpose(reshape_178, [0, 1, 3, 2, 4, 5]) + del reshape_178 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_179 = paddle._C_ops.reshape(transpose_78, full_int_array_41) + del transpose_78 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_179, full_int_array_29, [1, 2]) + del reshape_179 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_79 = [slice_79, full_30, full_18] + del slice_79 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_77 = paddle._C_ops.stack(combine_79, 0) + del combine_79 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_180 = paddle._C_ops.reshape(roll_11, stack_77) + del roll_11, stack_77 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_88 = paddle._C_ops.add(add_83, reshape_180) + del add_83, reshape_180 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_88, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_72 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del layer_norm_78, parameter_154 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_89 = paddle._C_ops.add(matmul_72, parameter_153) + del matmul_72, parameter_153 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_11 = paddle._C_ops.gelu(add_89, False) + del add_89 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_73 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del gelu_11, parameter_152 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_90 = paddle._C_ops.add(matmul_73, parameter_151) + del matmul_73, parameter_151 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_91 = paddle._C_ops.add(add_88, add_90) + del add_88, add_90 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_50 = paddle._C_ops.shape64(add_91) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_86 = paddle._C_ops.slice( + shape64_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_50 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_91, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_80 = [slice_86, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_78 = paddle._C_ops.stack(combine_80, 0) + del combine_80 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(layer_norm_81, stack_78) + del layer_norm_81, stack_78 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_51 = paddle._C_ops.shape64(reshape_181) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_87 = paddle._C_ops.slice( + shape64_51, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_81 = [slice_87, full_29, full_3, full_29, full_3, full_18] + del slice_87 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_79 = paddle._C_ops.stack(combine_81, 0) + del combine_81 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, stack_79) + del reshape_181, stack_79 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_79 = paddle._C_ops.transpose(reshape_182, [0, 1, 3, 2, 4, 5]) + del reshape_182 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(transpose_79, full_int_array_38) + del transpose_79 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_184 = paddle._C_ops.reshape(reshape_183, full_int_array_39) + del reshape_183 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_52 = paddle._C_ops.shape64(reshape_184) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_88 = paddle._C_ops.slice( + shape64_52, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_52 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_74 = paddle._C_ops.matmul(reshape_184, parameter_148, False, False) + del parameter_148, reshape_184 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_92 = paddle._C_ops.add(matmul_74, parameter_147) + del matmul_74, parameter_147 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_82 = [slice_88, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_80 = paddle._C_ops.stack(combine_82, 0) + del combine_82 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_185 = paddle._C_ops.reshape(add_92, stack_80) + del add_92, stack_80 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_185, [2, 0, 3, 1, 4]) + del reshape_185 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_89 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_90 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_91 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_80 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_89, full_8, float("0"), True) + del slice_89 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_90, [0, 1, 3, 2]) + del slice_90 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_75 = paddle._C_ops.matmul(scale_12, transpose_81, False, False) + del scale_12, transpose_81 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_25, full_int_array_7) + del data_25 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_12 = paddle._C_ops.index_select(data_26, reshape_186, 0) + del data_26, reshape_186 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_187 = paddle._C_ops.reshape(index_select_12, full_int_array_8) + del index_select_12 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_82 = paddle._C_ops.transpose(reshape_187, [2, 0, 1]) + del reshape_187 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_0) + del transpose_82 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_93 = paddle._C_ops.add(matmul_75, unsqueeze_36) + del matmul_75, unsqueeze_36 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_12 = paddle._C_ops.softmax(add_93, -1) + del add_93 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_76 = paddle._C_ops.matmul(softmax_12, slice_91, False, False) + del slice_91, softmax_12 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_76, [0, 2, 1, 3]) + del matmul_76 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_83 = [slice_88, full_4, full_18] + del slice_88 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_81 = paddle._C_ops.stack(combine_83, 0) + del combine_83 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_83, stack_81) + del stack_81, transpose_83 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_77 = paddle._C_ops.matmul(reshape_188, parameter_146, False, False) + del parameter_146, reshape_188 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_94 = paddle._C_ops.add(matmul_77, parameter_145) + del matmul_77, parameter_145 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_94, full_int_array_38) + del add_94 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_190 = paddle._C_ops.reshape(reshape_189, full_int_array_40) + del reshape_189 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_84 = paddle._C_ops.transpose(reshape_190, [0, 1, 3, 2, 4, 5]) + del reshape_190 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_191 = paddle._C_ops.reshape(transpose_84, full_int_array_41) + del transpose_84 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_84 = [slice_86, full_30, full_18] + del slice_86 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_82 = paddle._C_ops.stack(combine_84, 0) + del combine_84 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_192 = paddle._C_ops.reshape(reshape_191, stack_82) + del reshape_191, stack_82 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_95 = paddle._C_ops.add(add_91, reshape_192) + del add_91, reshape_192 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del layer_norm_84, parameter_142 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_96 = paddle._C_ops.add(matmul_78, parameter_141) + del matmul_78, parameter_141 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_12 = paddle._C_ops.gelu(add_96, False) + del add_96 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_79 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del gelu_12, parameter_140 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_97 = paddle._C_ops.add(matmul_79, parameter_139) + del matmul_79, parameter_139 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_98 = paddle._C_ops.add(add_95, add_97) + del add_95, add_97 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_53 = paddle._C_ops.shape64(add_98) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_92 = paddle._C_ops.slice( + shape64_53, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_53 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_85 = [slice_92, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_83 = paddle._C_ops.stack(combine_85, 0) + del combine_85 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_193 = paddle._C_ops.reshape(layer_norm_87, stack_83) + del layer_norm_87, stack_83 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_54 = paddle._C_ops.shape64(reshape_193) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_93 = paddle._C_ops.slice( + shape64_54, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_54 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_193, full_int_array_11, [1, 2]) + del reshape_193 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_55 = paddle._C_ops.shape64(roll_12) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_94 = paddle._C_ops.slice( + shape64_55, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_55 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_86 = [slice_94, full_29, full_3, full_29, full_3, full_18] + del slice_94 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_84 = paddle._C_ops.stack(combine_86, 0) + del combine_86 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_194 = paddle._C_ops.reshape(roll_12, stack_84) + del roll_12, stack_84 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_85 = paddle._C_ops.transpose(reshape_194, [0, 1, 3, 2, 4, 5]) + del reshape_194 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_85, full_int_array_38) + del transpose_85 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_39) + del reshape_195 + + # pd_op.full: (1x14x14x1xf32) <- () + full_38 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_38, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_197 = paddle._C_ops.reshape(set_value__6, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_86 = paddle._C_ops.transpose(reshape_197, [0, 1, 3, 2, 4, 5]) + del reshape_197 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_198 = paddle._C_ops.reshape(transpose_86, full_int_array_27) + del transpose_86 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_199 = paddle._C_ops.reshape(reshape_198, full_int_array_28) + del reshape_198 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_199, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_199, full_int_array_5) + del reshape_199 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_32, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_13 = paddle._C_ops.where(equal_6, full_33, where_12) + del equal_6, where_12 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_56 = paddle._C_ops.shape64(reshape_196) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_95 = paddle._C_ops.slice( + shape64_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_56 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_80 = paddle._C_ops.matmul(reshape_196, parameter_136, False, False) + del parameter_136, reshape_196 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_99 = paddle._C_ops.add(matmul_80, parameter_135) + del matmul_80, parameter_135 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_87 = [slice_95, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_85 = paddle._C_ops.stack(combine_87, 0) + del combine_87 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_200 = paddle._C_ops.reshape(add_99, stack_85) + del add_99, stack_85 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_87 = paddle._C_ops.transpose(reshape_200, [2, 0, 3, 1, 4]) + del reshape_200 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_96 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_97 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_98 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_87 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_96, full_8, float("0"), True) + del slice_96 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_88 = paddle._C_ops.transpose(slice_97, [0, 1, 3, 2]) + del slice_97 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_81 = paddle._C_ops.matmul(scale_13, transpose_88, False, False) + del scale_13, transpose_88 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_201 = paddle._C_ops.reshape(data_27, full_int_array_7) + del data_27 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_13 = paddle._C_ops.index_select(data_28, reshape_201, 0) + del data_28, reshape_201 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_202 = paddle._C_ops.reshape(index_select_13, full_int_array_8) + del index_select_13 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_89 = paddle._C_ops.transpose(reshape_202, [2, 0, 1]) + del reshape_202 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(transpose_89, full_int_array_0) + del transpose_89 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_100 = paddle._C_ops.add(matmul_81, unsqueeze_39) + del matmul_81, unsqueeze_39 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_6 = paddle._C_ops.floor_divide(slice_95, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_88 = [floor_divide_6, full_6, full_26, full_4, full_4] + del floor_divide_6 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_86 = paddle._C_ops.stack(combine_88, 0) + del combine_88 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_203 = paddle._C_ops.reshape(add_100, stack_86) + del add_100, stack_86 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(where_13, full_int_array_1) + del where_13 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(unsqueeze_40, full_int_array_0) + del unsqueeze_40 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_101 = paddle._C_ops.add(reshape_203, unsqueeze_41) + del reshape_203, unsqueeze_41 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_89 = [slice_95, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_87 = paddle._C_ops.stack(combine_89, 0) + del combine_89 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_204 = paddle._C_ops.reshape(add_101, stack_87) + del add_101, stack_87 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_13 = paddle._C_ops.softmax(reshape_204, -1) + del reshape_204 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_82 = paddle._C_ops.matmul(softmax_13, slice_98, False, False) + del slice_98, softmax_13 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_90 = paddle._C_ops.transpose(matmul_82, [0, 2, 1, 3]) + del matmul_82 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_90 = [slice_95, full_4, full_18] + del slice_95 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_88 = paddle._C_ops.stack(combine_90, 0) + del combine_90 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_205 = paddle._C_ops.reshape(transpose_90, stack_88) + del stack_88, transpose_90 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_83 = paddle._C_ops.matmul(reshape_205, parameter_134, False, False) + del parameter_134, reshape_205 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_102 = paddle._C_ops.add(matmul_83, parameter_133) + del matmul_83, parameter_133 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_206 = paddle._C_ops.reshape(add_102, full_int_array_38) + del add_102 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_207 = paddle._C_ops.reshape(reshape_206, full_int_array_40) + del reshape_206 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_91 = paddle._C_ops.transpose(reshape_207, [0, 1, 3, 2, 4, 5]) + del reshape_207 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(transpose_91, full_int_array_41) + del transpose_91 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_208, full_int_array_29, [1, 2]) + del reshape_208 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_91 = [slice_92, full_30, full_18] + del slice_92 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_89 = paddle._C_ops.stack(combine_91, 0) + del combine_91 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_209 = paddle._C_ops.reshape(roll_13, stack_89) + del roll_13, stack_89 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_103 = paddle._C_ops.add(add_98, reshape_209) + del add_98, reshape_209 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_103, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_84 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del layer_norm_90, parameter_130 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_104 = paddle._C_ops.add(matmul_84, parameter_129) + del matmul_84, parameter_129 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_13 = paddle._C_ops.gelu(add_104, False) + del add_104 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_85 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del gelu_13, parameter_128 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_105 = paddle._C_ops.add(matmul_85, parameter_127) + del matmul_85, parameter_127 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_106 = paddle._C_ops.add(add_103, add_105) + del add_103, add_105 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_57 = paddle._C_ops.shape64(add_106) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_99 = paddle._C_ops.slice( + shape64_57, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_57 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_106, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_92 = [slice_99, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_90 = paddle._C_ops.stack(combine_92, 0) + del combine_92 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_210 = paddle._C_ops.reshape(layer_norm_93, stack_90) + del layer_norm_93, stack_90 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_58 = paddle._C_ops.shape64(reshape_210) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_100 = paddle._C_ops.slice( + shape64_58, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_58 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_93 = [slice_100, full_29, full_3, full_29, full_3, full_18] + del slice_100 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_91 = paddle._C_ops.stack(combine_93, 0) + del combine_93 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_210, stack_91) + del reshape_210, stack_91 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_92 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_212 = paddle._C_ops.reshape(transpose_92, full_int_array_38) + del transpose_92 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(reshape_212, full_int_array_39) + del reshape_212 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_59 = paddle._C_ops.shape64(reshape_213) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_101 = paddle._C_ops.slice( + shape64_59, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_59 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_86 = paddle._C_ops.matmul(reshape_213, parameter_124, False, False) + del parameter_124, reshape_213 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_107 = paddle._C_ops.add(matmul_86, parameter_123) + del matmul_86, parameter_123 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_94 = [slice_101, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_92 = paddle._C_ops.stack(combine_94, 0) + del combine_94 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_214 = paddle._C_ops.reshape(add_107, stack_92) + del add_107, stack_92 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_93 = paddle._C_ops.transpose(reshape_214, [2, 0, 3, 1, 4]) + del reshape_214 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_102 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_103 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_104 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_93 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_102, full_8, float("0"), True) + del slice_102 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_94 = paddle._C_ops.transpose(slice_103, [0, 1, 3, 2]) + del slice_103 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_87 = paddle._C_ops.matmul(scale_14, transpose_94, False, False) + del scale_14, transpose_94 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_215 = paddle._C_ops.reshape(data_29, full_int_array_7) + del data_29 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_14 = paddle._C_ops.index_select(data_30, reshape_215, 0) + del data_30, reshape_215 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_216 = paddle._C_ops.reshape(index_select_14, full_int_array_8) + del index_select_14 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_95 = paddle._C_ops.transpose(reshape_216, [2, 0, 1]) + del reshape_216 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(transpose_95, full_int_array_0) + del transpose_95 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_108 = paddle._C_ops.add(matmul_87, unsqueeze_42) + del matmul_87, unsqueeze_42 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_14 = paddle._C_ops.softmax(add_108, -1) + del add_108 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_88 = paddle._C_ops.matmul(softmax_14, slice_104, False, False) + del slice_104, softmax_14 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_96 = paddle._C_ops.transpose(matmul_88, [0, 2, 1, 3]) + del matmul_88 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_95 = [slice_101, full_4, full_18] + del slice_101 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_93 = paddle._C_ops.stack(combine_95, 0) + del combine_95 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_217 = paddle._C_ops.reshape(transpose_96, stack_93) + del stack_93, transpose_96 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_89 = paddle._C_ops.matmul(reshape_217, parameter_122, False, False) + del parameter_122, reshape_217 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_109 = paddle._C_ops.add(matmul_89, parameter_121) + del matmul_89, parameter_121 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_218 = paddle._C_ops.reshape(add_109, full_int_array_38) + del add_109 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_219 = paddle._C_ops.reshape(reshape_218, full_int_array_40) + del reshape_218 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_97 = paddle._C_ops.transpose(reshape_219, [0, 1, 3, 2, 4, 5]) + del reshape_219 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_220 = paddle._C_ops.reshape(transpose_97, full_int_array_41) + del transpose_97 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_96 = [slice_99, full_30, full_18] + del slice_99 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_94 = paddle._C_ops.stack(combine_96, 0) + del combine_96 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_221 = paddle._C_ops.reshape(reshape_220, stack_94) + del reshape_220, stack_94 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_110 = paddle._C_ops.add(add_106, reshape_221) + del add_106, reshape_221 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_110, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del layer_norm_96, parameter_118 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_111 = paddle._C_ops.add(matmul_90, parameter_117) + del matmul_90, parameter_117 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_14 = paddle._C_ops.gelu(add_111, False) + del add_111 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_91 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del gelu_14, parameter_116 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_112 = paddle._C_ops.add(matmul_91, parameter_115) + del matmul_91, parameter_115 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_113 = paddle._C_ops.add(add_110, add_112) + del add_110, add_112 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_60 = paddle._C_ops.shape64(add_113) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_105 = paddle._C_ops.slice( + shape64_60, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_60 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_113, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_97 = [slice_105, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_95 = paddle._C_ops.stack(combine_97, 0) + del combine_97 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_222 = paddle._C_ops.reshape(layer_norm_99, stack_95) + del layer_norm_99, stack_95 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_61 = paddle._C_ops.shape64(reshape_222) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_106 = paddle._C_ops.slice( + shape64_61, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_61 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_222, full_int_array_11, [1, 2]) + del reshape_222 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_62 = paddle._C_ops.shape64(roll_14) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_107 = paddle._C_ops.slice( + shape64_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_62 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_98 = [slice_107, full_29, full_3, full_29, full_3, full_18] + del slice_107 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_96 = paddle._C_ops.stack(combine_98, 0) + del combine_98 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_223 = paddle._C_ops.reshape(roll_14, stack_96) + del roll_14, stack_96 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_98 = paddle._C_ops.transpose(reshape_223, [0, 1, 3, 2, 4, 5]) + del reshape_223 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_224 = paddle._C_ops.reshape(transpose_98, full_int_array_38) + del transpose_98 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_225 = paddle._C_ops.reshape(reshape_224, full_int_array_39) + del reshape_224 + + # pd_op.full: (1x14x14x1xf32) <- () + full_39 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_39, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_226 = paddle._C_ops.reshape(set_value__7, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_99 = paddle._C_ops.transpose(reshape_226, [0, 1, 3, 2, 4, 5]) + del reshape_226 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_227 = paddle._C_ops.reshape(transpose_99, full_int_array_27) + del transpose_99 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_228 = paddle._C_ops.reshape(reshape_227, full_int_array_28) + del reshape_227 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_228, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_228, full_int_array_5) + del reshape_228 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_32, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_15 = paddle._C_ops.where(equal_7, full_33, where_14) + del equal_7, where_14 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_63 = paddle._C_ops.shape64(reshape_225) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_108 = paddle._C_ops.slice( + shape64_63, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_63 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_92 = paddle._C_ops.matmul(reshape_225, parameter_112, False, False) + del parameter_112, reshape_225 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_114 = paddle._C_ops.add(matmul_92, parameter_111) + del matmul_92, parameter_111 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_99 = [slice_108, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_97 = paddle._C_ops.stack(combine_99, 0) + del combine_99 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_229 = paddle._C_ops.reshape(add_114, stack_97) + del add_114, stack_97 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_100 = paddle._C_ops.transpose(reshape_229, [2, 0, 3, 1, 4]) + del reshape_229 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_109 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_110 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_111 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_100 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_109, full_8, float("0"), True) + del slice_109 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_101 = paddle._C_ops.transpose(slice_110, [0, 1, 3, 2]) + del slice_110 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_93 = paddle._C_ops.matmul(scale_15, transpose_101, False, False) + del scale_15, transpose_101 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_230 = paddle._C_ops.reshape(data_31, full_int_array_7) + del data_31 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_15 = paddle._C_ops.index_select(data_32, reshape_230, 0) + del data_32, reshape_230 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_231 = paddle._C_ops.reshape(index_select_15, full_int_array_8) + del index_select_15 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_102 = paddle._C_ops.transpose(reshape_231, [2, 0, 1]) + del reshape_231 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(transpose_102, full_int_array_0) + del transpose_102 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_115 = paddle._C_ops.add(matmul_93, unsqueeze_45) + del matmul_93, unsqueeze_45 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_7 = paddle._C_ops.floor_divide(slice_108, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_100 = [floor_divide_7, full_6, full_26, full_4, full_4] + del floor_divide_7 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_98 = paddle._C_ops.stack(combine_100, 0) + del combine_100 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_115, stack_98) + del add_115, stack_98 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(where_15, full_int_array_1) + del where_15 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(unsqueeze_46, full_int_array_0) + del unsqueeze_46 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_116 = paddle._C_ops.add(reshape_232, unsqueeze_47) + del reshape_232, unsqueeze_47 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_101 = [slice_108, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_99 = paddle._C_ops.stack(combine_101, 0) + del combine_101 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_233 = paddle._C_ops.reshape(add_116, stack_99) + del add_116, stack_99 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_15 = paddle._C_ops.softmax(reshape_233, -1) + del reshape_233 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_94 = paddle._C_ops.matmul(softmax_15, slice_111, False, False) + del slice_111, softmax_15 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_103 = paddle._C_ops.transpose(matmul_94, [0, 2, 1, 3]) + del matmul_94 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_102 = [slice_108, full_4, full_18] + del slice_108 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_100 = paddle._C_ops.stack(combine_102, 0) + del combine_102 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_234 = paddle._C_ops.reshape(transpose_103, stack_100) + del stack_100, transpose_103 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_95 = paddle._C_ops.matmul(reshape_234, parameter_110, False, False) + del parameter_110, reshape_234 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_117 = paddle._C_ops.add(matmul_95, parameter_109) + del matmul_95, parameter_109 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_235 = paddle._C_ops.reshape(add_117, full_int_array_38) + del add_117 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_236 = paddle._C_ops.reshape(reshape_235, full_int_array_40) + del reshape_235 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_104 = paddle._C_ops.transpose(reshape_236, [0, 1, 3, 2, 4, 5]) + del reshape_236 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_237 = paddle._C_ops.reshape(transpose_104, full_int_array_41) + del transpose_104 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_237, full_int_array_29, [1, 2]) + del reshape_237 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_103 = [slice_105, full_30, full_18] + del slice_105 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_101 = paddle._C_ops.stack(combine_103, 0) + del combine_103 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_238 = paddle._C_ops.reshape(roll_15, stack_101) + del roll_15, stack_101 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_118 = paddle._C_ops.add(add_113, reshape_238) + del add_113, reshape_238 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_96 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del layer_norm_102, parameter_106 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_119 = paddle._C_ops.add(matmul_96, parameter_105) + del matmul_96, parameter_105 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_15 = paddle._C_ops.gelu(add_119, False) + del add_119 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_97 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del gelu_15, parameter_104 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_120 = paddle._C_ops.add(matmul_97, parameter_103) + del matmul_97, parameter_103 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_121 = paddle._C_ops.add(add_118, add_120) + del add_118, add_120 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_64 = paddle._C_ops.shape64(add_121) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_112 = paddle._C_ops.slice( + shape64_64, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_64 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_121, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_104 = [slice_112, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_102 = paddle._C_ops.stack(combine_104, 0) + del combine_104 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_239 = paddle._C_ops.reshape(layer_norm_105, stack_102) + del layer_norm_105, stack_102 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_65 = paddle._C_ops.shape64(reshape_239) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_113 = paddle._C_ops.slice( + shape64_65, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_65 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_105 = [slice_113, full_29, full_3, full_29, full_3, full_18] + del slice_113 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_103 = paddle._C_ops.stack(combine_105, 0) + del combine_105 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_239, stack_103) + del reshape_239, stack_103 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_105 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_241 = paddle._C_ops.reshape(transpose_105, full_int_array_38) + del transpose_105 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_242 = paddle._C_ops.reshape(reshape_241, full_int_array_39) + del reshape_241 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_66 = paddle._C_ops.shape64(reshape_242) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_114 = paddle._C_ops.slice( + shape64_66, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_66 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_98 = paddle._C_ops.matmul(reshape_242, parameter_100, False, False) + del parameter_100, reshape_242 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_122 = paddle._C_ops.add(matmul_98, parameter_99) + del matmul_98, parameter_99 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_106 = [slice_114, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_104 = paddle._C_ops.stack(combine_106, 0) + del combine_106 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_243 = paddle._C_ops.reshape(add_122, stack_104) + del add_122, stack_104 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_106 = paddle._C_ops.transpose(reshape_243, [2, 0, 3, 1, 4]) + del reshape_243 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_115 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_116 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_117 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_106 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_115, full_8, float("0"), True) + del slice_115 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_107 = paddle._C_ops.transpose(slice_116, [0, 1, 3, 2]) + del slice_116 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_99 = paddle._C_ops.matmul(scale_16, transpose_107, False, False) + del scale_16, transpose_107 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_244 = paddle._C_ops.reshape(data_33, full_int_array_7) + del data_33 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_16 = paddle._C_ops.index_select(data_34, reshape_244, 0) + del data_34, reshape_244 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_245 = paddle._C_ops.reshape(index_select_16, full_int_array_8) + del index_select_16 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_108 = paddle._C_ops.transpose(reshape_245, [2, 0, 1]) + del reshape_245 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(transpose_108, full_int_array_0) + del transpose_108 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_123 = paddle._C_ops.add(matmul_99, unsqueeze_48) + del matmul_99, unsqueeze_48 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_16 = paddle._C_ops.softmax(add_123, -1) + del add_123 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_100 = paddle._C_ops.matmul(softmax_16, slice_117, False, False) + del slice_117, softmax_16 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_109 = paddle._C_ops.transpose(matmul_100, [0, 2, 1, 3]) + del matmul_100 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_107 = [slice_114, full_4, full_18] + del slice_114 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_105 = paddle._C_ops.stack(combine_107, 0) + del combine_107 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(transpose_109, stack_105) + del stack_105, transpose_109 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_101 = paddle._C_ops.matmul(reshape_246, parameter_98, False, False) + del parameter_98, reshape_246 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_124 = paddle._C_ops.add(matmul_101, parameter_97) + del matmul_101, parameter_97 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_124, full_int_array_38) + del add_124 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_247, full_int_array_40) + del reshape_247 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_110 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_249 = paddle._C_ops.reshape(transpose_110, full_int_array_41) + del transpose_110 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_108 = [slice_112, full_30, full_18] + del slice_112 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_106 = paddle._C_ops.stack(combine_108, 0) + del combine_108 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_250 = paddle._C_ops.reshape(reshape_249, stack_106) + del reshape_249, stack_106 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_125 = paddle._C_ops.add(add_121, reshape_250) + del add_121, reshape_250 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_125, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_102 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del layer_norm_108, parameter_94 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_126 = paddle._C_ops.add(matmul_102, parameter_93) + del matmul_102, parameter_93 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_16 = paddle._C_ops.gelu(add_126, False) + del add_126 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_103 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del gelu_16, parameter_92 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_127 = paddle._C_ops.add(matmul_103, parameter_91) + del matmul_103, parameter_91 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_128 = paddle._C_ops.add(add_125, add_127) + del add_125, add_127 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_67 = paddle._C_ops.shape64(add_128) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_118 = paddle._C_ops.slice( + shape64_67, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_67 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_128, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_109 = [slice_118, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_107 = paddle._C_ops.stack(combine_109, 0) + del combine_109 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_251 = paddle._C_ops.reshape(layer_norm_111, stack_107) + del layer_norm_111, stack_107 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_68 = paddle._C_ops.shape64(reshape_251) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_119 = paddle._C_ops.slice( + shape64_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_68 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_251, full_int_array_11, [1, 2]) + del reshape_251 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_69 = paddle._C_ops.shape64(roll_16) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_120 = paddle._C_ops.slice( + shape64_69, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_69 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_110 = [slice_120, full_29, full_3, full_29, full_3, full_18] + del slice_120 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_108 = paddle._C_ops.stack(combine_110, 0) + del combine_110 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(roll_16, stack_108) + del roll_16, stack_108 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_111 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_253 = paddle._C_ops.reshape(transpose_111, full_int_array_38) + del transpose_111 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_254 = paddle._C_ops.reshape(reshape_253, full_int_array_39) + del reshape_253 + + # pd_op.full: (1x14x14x1xf32) <- () + full_40 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_40, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_255 = paddle._C_ops.reshape(set_value__8, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_112 = paddle._C_ops.transpose(reshape_255, [0, 1, 3, 2, 4, 5]) + del reshape_255 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_256 = paddle._C_ops.reshape(transpose_112, full_int_array_27) + del transpose_112 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_257 = paddle._C_ops.reshape(reshape_256, full_int_array_28) + del reshape_256 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_257, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_257, full_int_array_5) + del reshape_257 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_32, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_17 = paddle._C_ops.where(equal_8, full_33, where_16) + del equal_8, where_16 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_70 = paddle._C_ops.shape64(reshape_254) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_121 = paddle._C_ops.slice( + shape64_70, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_70 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_104 = paddle._C_ops.matmul(reshape_254, parameter_88, False, False) + del parameter_88, reshape_254 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_129 = paddle._C_ops.add(matmul_104, parameter_87) + del matmul_104, parameter_87 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_111 = [slice_121, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_109 = paddle._C_ops.stack(combine_111, 0) + del combine_111 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_258 = paddle._C_ops.reshape(add_129, stack_109) + del add_129, stack_109 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_113 = paddle._C_ops.transpose(reshape_258, [2, 0, 3, 1, 4]) + del reshape_258 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_122 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_123 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_124 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_113 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_122, full_8, float("0"), True) + del slice_122 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_114 = paddle._C_ops.transpose(slice_123, [0, 1, 3, 2]) + del slice_123 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_105 = paddle._C_ops.matmul(scale_17, transpose_114, False, False) + del scale_17, transpose_114 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_259 = paddle._C_ops.reshape(data_35, full_int_array_7) + del data_35 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_17 = paddle._C_ops.index_select(data_36, reshape_259, 0) + del data_36, reshape_259 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_260 = paddle._C_ops.reshape(index_select_17, full_int_array_8) + del index_select_17 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_115 = paddle._C_ops.transpose(reshape_260, [2, 0, 1]) + del reshape_260 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(transpose_115, full_int_array_0) + del transpose_115 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_130 = paddle._C_ops.add(matmul_105, unsqueeze_51) + del matmul_105, unsqueeze_51 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_8 = paddle._C_ops.floor_divide(slice_121, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_112 = [floor_divide_8, full_6, full_26, full_4, full_4] + del floor_divide_8 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_110 = paddle._C_ops.stack(combine_112, 0) + del combine_112 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_261 = paddle._C_ops.reshape(add_130, stack_110) + del add_130, stack_110 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(where_17, full_int_array_1) + del where_17 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(unsqueeze_52, full_int_array_0) + del unsqueeze_52 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_131 = paddle._C_ops.add(reshape_261, unsqueeze_53) + del reshape_261, unsqueeze_53 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_113 = [slice_121, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_111 = paddle._C_ops.stack(combine_113, 0) + del combine_113 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_262 = paddle._C_ops.reshape(add_131, stack_111) + del add_131, stack_111 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_17 = paddle._C_ops.softmax(reshape_262, -1) + del reshape_262 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_106 = paddle._C_ops.matmul(softmax_17, slice_124, False, False) + del slice_124, softmax_17 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_116 = paddle._C_ops.transpose(matmul_106, [0, 2, 1, 3]) + del matmul_106 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_114 = [slice_121, full_4, full_18] + del slice_121 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_112 = paddle._C_ops.stack(combine_114, 0) + del combine_114 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(transpose_116, stack_112) + del stack_112, transpose_116 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_107 = paddle._C_ops.matmul(reshape_263, parameter_86, False, False) + del parameter_86, reshape_263 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_132 = paddle._C_ops.add(matmul_107, parameter_85) + del matmul_107, parameter_85 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_264 = paddle._C_ops.reshape(add_132, full_int_array_38) + del add_132 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(reshape_264, full_int_array_40) + del reshape_264 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_117 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_266 = paddle._C_ops.reshape(transpose_117, full_int_array_41) + del transpose_117 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_266, full_int_array_29, [1, 2]) + del reshape_266 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_115 = [slice_118, full_30, full_18] + del slice_118 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_113 = paddle._C_ops.stack(combine_115, 0) + del combine_115 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_267 = paddle._C_ops.reshape(roll_17, stack_113) + del roll_17, stack_113 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_133 = paddle._C_ops.add(add_128, reshape_267) + del add_128, reshape_267 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_133, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_108 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del layer_norm_114, parameter_82 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_134 = paddle._C_ops.add(matmul_108, parameter_81) + del matmul_108, parameter_81 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_17 = paddle._C_ops.gelu(add_134, False) + del add_134 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_109 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del gelu_17, parameter_80 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_135 = paddle._C_ops.add(matmul_109, parameter_79) + del matmul_109, parameter_79 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_136 = paddle._C_ops.add(add_133, add_135) + del add_133, add_135 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_71 = paddle._C_ops.shape64(add_136) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_125 = paddle._C_ops.slice( + shape64_71, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_71 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_136, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_116 = [slice_125, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_114 = paddle._C_ops.stack(combine_116, 0) + del combine_116 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_268 = paddle._C_ops.reshape(layer_norm_117, stack_114) + del layer_norm_117, stack_114 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_72 = paddle._C_ops.shape64(reshape_268) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_126 = paddle._C_ops.slice( + shape64_72, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_72 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_117 = [slice_126, full_29, full_3, full_29, full_3, full_18] + del slice_126 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_115 = paddle._C_ops.stack(combine_117, 0) + del combine_117 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_269 = paddle._C_ops.reshape(reshape_268, stack_115) + del reshape_268, stack_115 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_118 = paddle._C_ops.transpose(reshape_269, [0, 1, 3, 2, 4, 5]) + del reshape_269 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_270 = paddle._C_ops.reshape(transpose_118, full_int_array_38) + del transpose_118 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_271 = paddle._C_ops.reshape(reshape_270, full_int_array_39) + del reshape_270 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_73 = paddle._C_ops.shape64(reshape_271) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_127 = paddle._C_ops.slice( + shape64_73, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_73 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_110 = paddle._C_ops.matmul(reshape_271, parameter_76, False, False) + del parameter_76, reshape_271 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_137 = paddle._C_ops.add(matmul_110, parameter_75) + del matmul_110, parameter_75 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_118 = [slice_127, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_116 = paddle._C_ops.stack(combine_118, 0) + del combine_118 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_272 = paddle._C_ops.reshape(add_137, stack_116) + del add_137, stack_116 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_119 = paddle._C_ops.transpose(reshape_272, [2, 0, 3, 1, 4]) + del reshape_272 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_128 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_129 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_130 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_119 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_128, full_8, float("0"), True) + del slice_128 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_120 = paddle._C_ops.transpose(slice_129, [0, 1, 3, 2]) + del slice_129 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_111 = paddle._C_ops.matmul(scale_18, transpose_120, False, False) + del scale_18, transpose_120 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_273 = paddle._C_ops.reshape(data_37, full_int_array_7) + del data_37 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_18 = paddle._C_ops.index_select(data_38, reshape_273, 0) + del data_38, reshape_273 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_274 = paddle._C_ops.reshape(index_select_18, full_int_array_8) + del index_select_18 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_121 = paddle._C_ops.transpose(reshape_274, [2, 0, 1]) + del reshape_274 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(transpose_121, full_int_array_0) + del transpose_121 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_138 = paddle._C_ops.add(matmul_111, unsqueeze_54) + del matmul_111, unsqueeze_54 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_18 = paddle._C_ops.softmax(add_138, -1) + del add_138 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_112 = paddle._C_ops.matmul(softmax_18, slice_130, False, False) + del slice_130, softmax_18 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_122 = paddle._C_ops.transpose(matmul_112, [0, 2, 1, 3]) + del matmul_112 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_119 = [slice_127, full_4, full_18] + del slice_127 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_117 = paddle._C_ops.stack(combine_119, 0) + del combine_119 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(transpose_122, stack_117) + del stack_117, transpose_122 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_113 = paddle._C_ops.matmul(reshape_275, parameter_74, False, False) + del parameter_74, reshape_275 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_139 = paddle._C_ops.add(matmul_113, parameter_73) + del matmul_113, parameter_73 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_276 = paddle._C_ops.reshape(add_139, full_int_array_38) + del add_139 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(reshape_276, full_int_array_40) + del reshape_276 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_123 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_278 = paddle._C_ops.reshape(transpose_123, full_int_array_41) + del transpose_123 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_120 = [slice_125, full_30, full_18] + del slice_125 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_118 = paddle._C_ops.stack(combine_120, 0) + del combine_120 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_279 = paddle._C_ops.reshape(reshape_278, stack_118) + del reshape_278, stack_118 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_140 = paddle._C_ops.add(add_136, reshape_279) + del add_136, reshape_279 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_114 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del layer_norm_120, parameter_70 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_141 = paddle._C_ops.add(matmul_114, parameter_69) + del matmul_114, parameter_69 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_18 = paddle._C_ops.gelu(add_141, False) + del add_141 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_115 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del gelu_18, parameter_68 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_142 = paddle._C_ops.add(matmul_115, parameter_67) + del matmul_115, parameter_67 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_143 = paddle._C_ops.add(add_140, add_142) + del add_140, add_142 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_74 = paddle._C_ops.shape64(add_143) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_131 = paddle._C_ops.slice( + shape64_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_74 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_121 = [slice_131, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_119 = paddle._C_ops.stack(combine_121, 0) + del combine_121 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_280 = paddle._C_ops.reshape(layer_norm_123, stack_119) + del layer_norm_123, stack_119 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_75 = paddle._C_ops.shape64(reshape_280) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_132 = paddle._C_ops.slice( + shape64_75, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_75 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_280, full_int_array_11, [1, 2]) + del reshape_280 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_76 = paddle._C_ops.shape64(roll_18) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_133 = paddle._C_ops.slice( + shape64_76, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_76 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_122 = [slice_133, full_29, full_3, full_29, full_3, full_18] + del slice_133 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_120 = paddle._C_ops.stack(combine_122, 0) + del combine_122 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_281 = paddle._C_ops.reshape(roll_18, stack_120) + del roll_18, stack_120 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_124 = paddle._C_ops.transpose(reshape_281, [0, 1, 3, 2, 4, 5]) + del reshape_281 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_282 = paddle._C_ops.reshape(transpose_124, full_int_array_38) + del transpose_124 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_283 = paddle._C_ops.reshape(reshape_282, full_int_array_39) + del reshape_282 + + # pd_op.full: (1x14x14x1xf32) <- () + full_41 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_41, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(set_value__9, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_125 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_285 = paddle._C_ops.reshape(transpose_125, full_int_array_27) + del transpose_125 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_286 = paddle._C_ops.reshape(reshape_285, full_int_array_28) + del reshape_285 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_286, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_286, full_int_array_5) + del reshape_286 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_32, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_19 = paddle._C_ops.where(equal_9, full_33, where_18) + del equal_9, where_18 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_77 = paddle._C_ops.shape64(reshape_283) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_134 = paddle._C_ops.slice( + shape64_77, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_77 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_116 = paddle._C_ops.matmul(reshape_283, parameter_64, False, False) + del parameter_64, reshape_283 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_144 = paddle._C_ops.add(matmul_116, parameter_63) + del matmul_116, parameter_63 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_123 = [slice_134, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_121 = paddle._C_ops.stack(combine_123, 0) + del combine_123 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_287 = paddle._C_ops.reshape(add_144, stack_121) + del add_144, stack_121 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_126 = paddle._C_ops.transpose(reshape_287, [2, 0, 3, 1, 4]) + del reshape_287 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_135 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_136 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_137 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_126 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_135, full_8, float("0"), True) + del slice_135 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_127 = paddle._C_ops.transpose(slice_136, [0, 1, 3, 2]) + del slice_136 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_117 = paddle._C_ops.matmul(scale_19, transpose_127, False, False) + del scale_19, transpose_127 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_288 = paddle._C_ops.reshape(data_39, full_int_array_7) + del data_39 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_19 = paddle._C_ops.index_select(data_40, reshape_288, 0) + del data_40, reshape_288 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_289 = paddle._C_ops.reshape(index_select_19, full_int_array_8) + del index_select_19 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_128 = paddle._C_ops.transpose(reshape_289, [2, 0, 1]) + del reshape_289 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(transpose_128, full_int_array_0) + del transpose_128 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_145 = paddle._C_ops.add(matmul_117, unsqueeze_57) + del matmul_117, unsqueeze_57 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_9 = paddle._C_ops.floor_divide(slice_134, full_34) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_124 = [floor_divide_9, full_6, full_26, full_4, full_4] + del floor_divide_9 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_122 = paddle._C_ops.stack(combine_124, 0) + del combine_124 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_290 = paddle._C_ops.reshape(add_145, stack_122) + del add_145, stack_122 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(where_19, full_int_array_1) + del where_19 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(unsqueeze_58, full_int_array_0) + del unsqueeze_58 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_146 = paddle._C_ops.add(reshape_290, unsqueeze_59) + del reshape_290, unsqueeze_59 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_125 = [slice_134, full_26, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_123 = paddle._C_ops.stack(combine_125, 0) + del combine_125 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(add_146, stack_123) + del add_146, stack_123 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_19 = paddle._C_ops.softmax(reshape_291, -1) + del reshape_291 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_118 = paddle._C_ops.matmul(softmax_19, slice_137, False, False) + del slice_137, softmax_19 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_129 = paddle._C_ops.transpose(matmul_118, [0, 2, 1, 3]) + del matmul_118 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_126 = [slice_134, full_4, full_18] + del slice_134 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_124 = paddle._C_ops.stack(combine_126, 0) + del combine_126 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_292 = paddle._C_ops.reshape(transpose_129, stack_124) + del stack_124, transpose_129 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_119 = paddle._C_ops.matmul(reshape_292, parameter_62, False, False) + del parameter_62, reshape_292 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_147 = paddle._C_ops.add(matmul_119, parameter_61) + del matmul_119, parameter_61 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_293 = paddle._C_ops.reshape(add_147, full_int_array_38) + del add_147 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_294 = paddle._C_ops.reshape(reshape_293, full_int_array_40) + del reshape_293 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_130 = paddle._C_ops.transpose(reshape_294, [0, 1, 3, 2, 4, 5]) + del reshape_294 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(transpose_130, full_int_array_41) + del transpose_130 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_295, full_int_array_29, [1, 2]) + del reshape_295 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_127 = [slice_131, full_30, full_18] + del slice_131 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_125 = paddle._C_ops.stack(combine_127, 0) + del combine_127 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_296 = paddle._C_ops.reshape(roll_19, stack_125) + del roll_19, stack_125 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_148 = paddle._C_ops.add(add_143, reshape_296) + del add_143, reshape_296 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_148, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_120 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del layer_norm_126, parameter_58 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_149 = paddle._C_ops.add(matmul_120, parameter_57) + del matmul_120, parameter_57 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_19 = paddle._C_ops.gelu(add_149, False) + del add_149 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_121 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del gelu_19, parameter_56 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_150 = paddle._C_ops.add(matmul_121, parameter_55) + del matmul_121, parameter_55 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_151 = paddle._C_ops.add(add_148, add_150) + del add_148, add_150 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_78 = paddle._C_ops.shape64(add_151) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_138 = paddle._C_ops.slice( + shape64_78, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_78 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_128 = [slice_138, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_126 = paddle._C_ops.stack(combine_128, 0) + del combine_128 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_297 = paddle._C_ops.reshape(layer_norm_129, stack_126) + del layer_norm_129, stack_126 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_79 = paddle._C_ops.shape64(reshape_297) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_139 = paddle._C_ops.slice( + shape64_79, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_79 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_129 = [slice_139, full_29, full_3, full_29, full_3, full_18] + del slice_139 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_127 = paddle._C_ops.stack(combine_129, 0) + del combine_129 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_298 = paddle._C_ops.reshape(reshape_297, stack_127) + del reshape_297, stack_127 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_131 = paddle._C_ops.transpose(reshape_298, [0, 1, 3, 2, 4, 5]) + del reshape_298 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_299 = paddle._C_ops.reshape(transpose_131, full_int_array_38) + del transpose_131 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_300 = paddle._C_ops.reshape(reshape_299, full_int_array_39) + del reshape_299 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_80 = paddle._C_ops.shape64(reshape_300) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_140 = paddle._C_ops.slice( + shape64_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_80 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_122 = paddle._C_ops.matmul(reshape_300, parameter_52, False, False) + del parameter_52, reshape_300 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_152 = paddle._C_ops.add(matmul_122, parameter_51) + del matmul_122, parameter_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_130 = [slice_140, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_128 = paddle._C_ops.stack(combine_130, 0) + del combine_130 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_301 = paddle._C_ops.reshape(add_152, stack_128) + del add_152, stack_128 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_132 = paddle._C_ops.transpose(reshape_301, [2, 0, 3, 1, 4]) + del reshape_301 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_141 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_142 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_143 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_132 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_141, full_8, float("0"), True) + del slice_141 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_133 = paddle._C_ops.transpose(slice_142, [0, 1, 3, 2]) + del slice_142 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_123 = paddle._C_ops.matmul(scale_20, transpose_133, False, False) + del scale_20, transpose_133 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_302 = paddle._C_ops.reshape(data_41, full_int_array_7) + del data_41 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_20 = paddle._C_ops.index_select(data_42, reshape_302, 0) + del data_42, reshape_302 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_303 = paddle._C_ops.reshape(index_select_20, full_int_array_8) + del index_select_20 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_134 = paddle._C_ops.transpose(reshape_303, [2, 0, 1]) + del reshape_303 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(transpose_134, full_int_array_0) + del transpose_134 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_153 = paddle._C_ops.add(matmul_123, unsqueeze_60) + del matmul_123, unsqueeze_60 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_20 = paddle._C_ops.softmax(add_153, -1) + del add_153 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_20, slice_143, False, False) + del slice_143, softmax_20 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_135 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_131 = [slice_140, full_4, full_18] + del slice_140 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_129 = paddle._C_ops.stack(combine_131, 0) + del combine_131 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_304 = paddle._C_ops.reshape(transpose_135, stack_129) + del stack_129, transpose_135 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_125 = paddle._C_ops.matmul(reshape_304, parameter_50, False, False) + del parameter_50, reshape_304 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_154 = paddle._C_ops.add(matmul_125, parameter_49) + del matmul_125, parameter_49 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_305 = paddle._C_ops.reshape(add_154, full_int_array_38) + del add_154 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_306 = paddle._C_ops.reshape(reshape_305, full_int_array_40) + del reshape_305 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_136 = paddle._C_ops.transpose(reshape_306, [0, 1, 3, 2, 4, 5]) + del reshape_306 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(transpose_136, full_int_array_41) + del transpose_136 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_132 = [slice_138, full_30, full_18] + del slice_138 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_130 = paddle._C_ops.stack(combine_132, 0) + del combine_132 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_308 = paddle._C_ops.reshape(reshape_307, stack_130) + del reshape_307, stack_130 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_155 = paddle._C_ops.add(add_151, reshape_308) + del add_151, reshape_308 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_155, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_126 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del layer_norm_132, parameter_46 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_156 = paddle._C_ops.add(matmul_126, parameter_45) + del matmul_126, parameter_45 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_20 = paddle._C_ops.gelu(add_156, False) + del add_156 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_127 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del gelu_20, parameter_44 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_157 = paddle._C_ops.add(matmul_127, parameter_43) + del matmul_127, parameter_43 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_158 = paddle._C_ops.add(add_155, add_157) + del add_155, add_157 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_81 = paddle._C_ops.shape64(add_158) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_144 = paddle._C_ops.slice( + shape64_81, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_81 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_158, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_133 = [slice_144, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_131 = paddle._C_ops.stack(combine_133, 0) + del combine_133 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_309 = paddle._C_ops.reshape(layer_norm_135, stack_131) + del layer_norm_135, stack_131 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_82 = paddle._C_ops.shape64(reshape_309) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_145 = paddle._C_ops.slice( + shape64_82, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_82 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_309, full_int_array_11, [1, 2]) + del reshape_309 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_83 = paddle._C_ops.shape64(roll_20) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_146 = paddle._C_ops.slice( + shape64_83, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_83 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_134 = [slice_146, full_29, full_3, full_29, full_3, full_18] + del full_29, slice_146 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_132 = paddle._C_ops.stack(combine_134, 0) + del combine_134 + + # pd_op.reshape: (-1x2x7x2x7x512xf32) <- (-1x14x14x512xf32, 6xi64) + reshape_310 = paddle._C_ops.reshape(roll_20, stack_132) + del roll_20, stack_132 + + # pd_op.transpose: (-1x2x2x7x7x512xf32) <- (-1x2x7x2x7x512xf32) + transpose_137 = paddle._C_ops.transpose(reshape_310, [0, 1, 3, 2, 4, 5]) + del reshape_310 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x2x2x7x7x512xf32, 4xi64) + reshape_311 = paddle._C_ops.reshape(transpose_137, full_int_array_38) + del transpose_137 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x7x7x512xf32, 3xi64) + reshape_312 = paddle._C_ops.reshape(reshape_311, full_int_array_39) + del full_int_array_39, reshape_311 + + # pd_op.full: (1x14x14x1xf32) <- () + full_42 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_42, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(set_value__10, full_int_array_42) + del full_int_array_42 + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_138 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_314 = paddle._C_ops.reshape(transpose_138, full_int_array_27) + del transpose_138 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_315 = paddle._C_ops.reshape(reshape_314, full_int_array_28) + del reshape_314 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_315, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_315, full_int_array_5) + del reshape_315 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_32, subtract_10) + del full_32, not_equal_10, subtract_10 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_21 = paddle._C_ops.where(equal_10, full_33, where_20) + del equal_10, full_33, where_20 + + # pd_op.shape64: (3xi64) <- (-1x49x512xf32) + shape64_84 = paddle._C_ops.shape64(reshape_312) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_147 = paddle._C_ops.slice( + shape64_84, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_84 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x512xf32, 512x1536xf32) + matmul_128 = paddle._C_ops.matmul(reshape_312, parameter_40, False, False) + del parameter_40, reshape_312 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_159 = paddle._C_ops.add(matmul_128, parameter_39) + del matmul_128, parameter_39 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_135 = [slice_147, full_4, full_5, full_26, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_133 = paddle._C_ops.stack(combine_135, 0) + del combine_135 + + # pd_op.reshape: (-1x49x3x16x32xf32) <- (-1x49x1536xf32, 5xi64) + reshape_316 = paddle._C_ops.reshape(add_159, stack_133) + del add_159, stack_133 + + # pd_op.transpose: (3x-1x16x49x32xf32) <- (-1x49x3x16x32xf32) + transpose_139 = paddle._C_ops.transpose(reshape_316, [2, 0, 3, 1, 4]) + del reshape_316 + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_148 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_149 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x16x49x32xf32) <- (3x-1x16x49x32xf32, 1xi64, 1xi64) + slice_150 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_139 + + # pd_op.scale: (-1x16x49x32xf32) <- (-1x16x49x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_148, full_8, float("0"), True) + del slice_148 + + # pd_op.transpose: (-1x16x32x49xf32) <- (-1x16x49x32xf32) + transpose_140 = paddle._C_ops.transpose(slice_149, [0, 1, 3, 2]) + del slice_149 + + # pd_op.matmul: (-1x16x49x49xf32) <- (-1x16x49x32xf32, -1x16x32x49xf32) + matmul_129 = paddle._C_ops.matmul(scale_21, transpose_140, False, False) + del scale_21, transpose_140 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_317 = paddle._C_ops.reshape(data_43, full_int_array_7) + del data_43 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_21 = paddle._C_ops.index_select(data_44, reshape_317, 0) + del data_44, reshape_317 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_21, full_int_array_8) + del index_select_21 + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_141 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(transpose_141, full_int_array_0) + del transpose_141 + + # pd_op.add: (-1x16x49x49xf32) <- (-1x16x49x49xf32, 1x16x49x49xf32) + add_160 = paddle._C_ops.add(matmul_129, unsqueeze_63) + del matmul_129, unsqueeze_63 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_10 = paddle._C_ops.floor_divide(slice_147, full_34) + del full_34 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_136 = [floor_divide_10, full_6, full_26, full_4, full_4] + del floor_divide_10, full_6 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_134 = paddle._C_ops.stack(combine_136, 0) + del combine_136 + + # pd_op.reshape: (-1x4x16x49x49xf32) <- (-1x16x49x49xf32, 5xi64) + reshape_319 = paddle._C_ops.reshape(add_160, stack_134) + del add_160, stack_134 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(where_21, full_int_array_1) + del where_21 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(unsqueeze_64, full_int_array_0) + del unsqueeze_64 + + # pd_op.add: (-1x4x16x49x49xf32) <- (-1x4x16x49x49xf32, 1x4x1x49x49xf32) + add_161 = paddle._C_ops.add(reshape_319, unsqueeze_65) + del reshape_319, unsqueeze_65 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_137 = [slice_147, full_26, full_4, full_4] + del full_26 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_135 = paddle._C_ops.stack(combine_137, 0) + del combine_137 + + # pd_op.reshape: (-1x16x49x49xf32) <- (-1x4x16x49x49xf32, 4xi64) + reshape_320 = paddle._C_ops.reshape(add_161, stack_135) + del add_161, stack_135 + + # pd_op.softmax: (-1x16x49x49xf32) <- (-1x16x49x49xf32) + softmax_21 = paddle._C_ops.softmax(reshape_320, -1) + del reshape_320 + + # pd_op.matmul: (-1x16x49x32xf32) <- (-1x16x49x49xf32, -1x16x49x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_21, slice_150, False, False) + del slice_150, softmax_21 + + # pd_op.transpose: (-1x49x16x32xf32) <- (-1x16x49x32xf32) + transpose_142 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_138 = [slice_147, full_4, full_18] + del slice_147 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_136 = paddle._C_ops.stack(combine_138, 0) + del combine_138 + + # pd_op.reshape: (-1x49x512xf32) <- (-1x49x16x32xf32, 3xi64) + reshape_321 = paddle._C_ops.reshape(transpose_142, stack_136) + del stack_136, transpose_142 + + # pd_op.matmul: (-1x49x512xf32) <- (-1x49x512xf32, 512x512xf32) + matmul_131 = paddle._C_ops.matmul(reshape_321, parameter_38, False, False) + del parameter_38, reshape_321 + + # pd_op.add: (-1x49x512xf32) <- (-1x49x512xf32, 512xf32) + add_162 = paddle._C_ops.add(matmul_131, parameter_37) + del matmul_131, parameter_37 + + # pd_op.reshape: (-1x7x7x512xf32) <- (-1x49x512xf32, 4xi64) + reshape_322 = paddle._C_ops.reshape(add_162, full_int_array_38) + del add_162, full_int_array_38 + + # pd_op.reshape: (-1x2x2x7x7x512xf32) <- (-1x7x7x512xf32, 6xi64) + reshape_323 = paddle._C_ops.reshape(reshape_322, full_int_array_40) + del full_int_array_40, reshape_322 + + # pd_op.transpose: (-1x2x7x2x7x512xf32) <- (-1x2x2x7x7x512xf32) + transpose_143 = paddle._C_ops.transpose(reshape_323, [0, 1, 3, 2, 4, 5]) + del reshape_323 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x2x7x2x7x512xf32, 4xi64) + reshape_324 = paddle._C_ops.reshape(transpose_143, full_int_array_41) + del full_int_array_41, transpose_143 + + # pd_op.roll: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_324, full_int_array_29, [1, 2]) + del reshape_324 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_139 = [slice_144, full_30, full_18] + del full_30, slice_144 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_137 = paddle._C_ops.stack(combine_139, 0) + del combine_139 + + # pd_op.reshape: (-1x196x512xf32) <- (-1x14x14x512xf32, 3xi64) + reshape_325 = paddle._C_ops.reshape(roll_21, stack_137) + del roll_21, stack_137 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_163 = paddle._C_ops.add(add_158, reshape_325) + del add_158, reshape_325 + + # pd_op.layer_norm: (-1x196x512xf32, -1x196xf32, -1x196xf32) <- (-1x196x512xf32, 512xf32, 512xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_163, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (-1x196x2048xf32) <- (-1x196x512xf32, 512x2048xf32) + matmul_132 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del layer_norm_138, parameter_34 + + # pd_op.add: (-1x196x2048xf32) <- (-1x196x2048xf32, 2048xf32) + add_164 = paddle._C_ops.add(matmul_132, parameter_33) + del matmul_132, parameter_33 + + # pd_op.gelu: (-1x196x2048xf32) <- (-1x196x2048xf32) + gelu_21 = paddle._C_ops.gelu(add_164, False) + del add_164 + + # pd_op.matmul: (-1x196x512xf32) <- (-1x196x2048xf32, 2048x512xf32) + matmul_133 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del gelu_21, parameter_32 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, 512xf32) + add_165 = paddle._C_ops.add(matmul_133, parameter_31) + del matmul_133, parameter_31 + + # pd_op.add: (-1x196x512xf32) <- (-1x196x512xf32, -1x196x512xf32) + add_166 = paddle._C_ops.add(add_163, add_165) + del add_163, add_165 + + # pd_op.shape64: (3xi64) <- (-1x196x512xf32) + shape64_85 = paddle._C_ops.shape64(add_166) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_151 = paddle._C_ops.slice( + shape64_85, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_85 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_140 = [slice_151, full_28, full_28, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_138 = paddle._C_ops.stack(combine_140, 0) + del combine_140 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x196x512xf32, 4xi64) + reshape_326 = paddle._C_ops.reshape(add_166, stack_138) + del add_166, stack_138 + + # pd_op.strided_slice: (-1x7x7x512xf32) <- (-1x14x14x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x7x7x512xf32) <- (-1x14x14x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + del full_int_array_31 + + # pd_op.strided_slice: (-1x7x7x512xf32) <- (-1x14x14x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + del full_int_array_32 + + # pd_op.strided_slice: (-1x7x7x512xf32) <- (-1x14x14x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + del full_int_array_30 + + # pd_op.shape64: (4xi64) <- (-1x14x14x512xf32) + shape64_86 = paddle._C_ops.shape64(reshape_326) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_152 = paddle._C_ops.slice( + shape64_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_86 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_141 = [slice_152, full_28, full_28, full_18] + del full_18, full_28, slice_152 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_139 = paddle._C_ops.stack(combine_141, 0) + del combine_141 + + # pd_op.reshape: (-1x14x14x512xf32) <- (-1x14x14x512xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(reshape_326, stack_139) + del reshape_326, stack_139 + + # builtin.combine: ([-1x7x7x512xf32, -1x7x7x512xf32, -1x7x7x512xf32, -1x7x7x512xf32]) <- (-1x7x7x512xf32, -1x7x7x512xf32, -1x7x7x512xf32, -1x7x7x512xf32) + combine_142 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + del strided_slice_10, strided_slice_11, strided_slice_8, strided_slice_9 + + # pd_op.concat: (-1x7x7x2048xf32) <- ([-1x7x7x512xf32, -1x7x7x512xf32, -1x7x7x512xf32, -1x7x7x512xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_142, full_16) + del combine_142, full_16 + + # pd_op.full: (xi64) <- () + full_43 = paddle._C_ops.full( + [], float("2048"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_143 = [slice_151, full_17, full_43] + del full_17, full_43, slice_151 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_140 = paddle._C_ops.stack(combine_143, 0) + del combine_143 + + # pd_op.reshape: (-1x-1x2048xf32) <- (-1x7x7x2048xf32, 3xi64) + reshape_328 = paddle._C_ops.reshape(concat_2, stack_140) + del concat_2, stack_140 + + # pd_op.layer_norm: (-1x-1x2048xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x2048xf32, 2048xf32, 2048xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_328, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30, reshape_328 + + # pd_op.matmul: (-1x-1x1024xf32) <- (-1x-1x2048xf32, 2048x1024xf32) + matmul_134 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del layer_norm_141, parameter_28 + + # pd_op.shape64: (3xi64) <- (-1x-1x1024xf32) + shape64_87 = paddle._C_ops.shape64(matmul_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_153 = paddle._C_ops.slice( + shape64_87, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_87 + + # pd_op.shape64: (3xi64) <- (-1x-1x1024xf32) + shape64_88 = paddle._C_ops.shape64(matmul_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_154 = paddle._C_ops.slice( + shape64_88, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_88 + + # pd_op.layer_norm: (-1x-1x1024xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1024xf32, 1024xf32, 1024xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_134, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_144 = [slice_153, full_3, full_3, full_27] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_141 = paddle._C_ops.stack(combine_144, 0) + del combine_144 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x-1x1024xf32, 4xi64) + reshape_329 = paddle._C_ops.reshape(layer_norm_144, stack_141) + del layer_norm_144, stack_141 + + # pd_op.shape64: (4xi64) <- (-1x7x7x1024xf32) + shape64_89 = paddle._C_ops.shape64(reshape_329) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_155 = paddle._C_ops.slice( + shape64_89, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_89 + + # pd_op.full: (xi64) <- () + full_44 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_145 = [slice_155, full_44, full_3, full_44, full_3, full_27] + del slice_155 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_142 = paddle._C_ops.stack(combine_145, 0) + del combine_145 + + # pd_op.reshape: (-1x1x7x1x7x1024xf32) <- (-1x7x7x1024xf32, 6xi64) + reshape_330 = paddle._C_ops.reshape(reshape_329, stack_142) + del reshape_329, stack_142 + + # pd_op.transpose: (-1x1x1x7x7x1024xf32) <- (-1x1x7x1x7x1024xf32) + transpose_144 = paddle._C_ops.transpose(reshape_330, [0, 1, 3, 2, 4, 5]) + del reshape_330 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_43 = [-1, 7, 7, 1024] + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x1x1x7x7x1024xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(transpose_144, full_int_array_43) + del transpose_144 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_44 = [-1, 49, 1024] + + # pd_op.reshape: (-1x49x1024xf32) <- (-1x7x7x1024xf32, 3xi64) + reshape_332 = paddle._C_ops.reshape(reshape_331, full_int_array_44) + del reshape_331 + + # pd_op.shape64: (3xi64) <- (-1x49x1024xf32) + shape64_90 = paddle._C_ops.shape64(reshape_332) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_156 = paddle._C_ops.slice( + shape64_90, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_90 + + # pd_op.matmul: (-1x49x3072xf32) <- (-1x49x1024xf32, 1024x3072xf32) + matmul_135 = paddle._C_ops.matmul(reshape_332, parameter_25, False, False) + del parameter_25, reshape_332 + + # pd_op.add: (-1x49x3072xf32) <- (-1x49x3072xf32, 3072xf32) + add_167 = paddle._C_ops.add(matmul_135, parameter_24) + del matmul_135, parameter_24 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_146 = [slice_156, full_4, full_5, full_7, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_143 = paddle._C_ops.stack(combine_146, 0) + del combine_146 + + # pd_op.reshape: (-1x49x3x32x32xf32) <- (-1x49x3072xf32, 5xi64) + reshape_333 = paddle._C_ops.reshape(add_167, stack_143) + del add_167, stack_143 + + # pd_op.transpose: (3x-1x32x49x32xf32) <- (-1x49x3x32x32xf32) + transpose_145 = paddle._C_ops.transpose(reshape_333, [2, 0, 3, 1, 4]) + del reshape_333 + + # pd_op.slice: (-1x32x49x32xf32) <- (3x-1x32x49x32xf32, 1xi64, 1xi64) + slice_157 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x32x49x32xf32) <- (3x-1x32x49x32xf32, 1xi64, 1xi64) + slice_158 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x32x49x32xf32) <- (3x-1x32x49x32xf32, 1xi64, 1xi64) + slice_159 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_145 + + # pd_op.scale: (-1x32x49x32xf32) <- (-1x32x49x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_157, full_8, float("0"), True) + del slice_157 + + # pd_op.transpose: (-1x32x32x49xf32) <- (-1x32x49x32xf32) + transpose_146 = paddle._C_ops.transpose(slice_158, [0, 1, 3, 2]) + del slice_158 + + # pd_op.matmul: (-1x32x49x49xf32) <- (-1x32x49x32xf32, -1x32x32x49xf32) + matmul_136 = paddle._C_ops.matmul(scale_22, transpose_146, False, False) + del scale_22, transpose_146 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_334 = paddle._C_ops.reshape(data_45, full_int_array_7) + del data_45 + + # pd_op.index_select: (2401x32xf32) <- (169x32xf32, 2401xi64) + index_select_22 = paddle._C_ops.index_select(data_46, reshape_334, 0) + del data_46, reshape_334 + + # pd_op.reshape: (49x49x32xf32) <- (2401x32xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_22, full_int_array_8) + del index_select_22 + + # pd_op.transpose: (32x49x49xf32) <- (49x49x32xf32) + transpose_147 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x32x49x49xf32) <- (32x49x49xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(transpose_147, full_int_array_0) + del transpose_147 + + # pd_op.add: (-1x32x49x49xf32) <- (-1x32x49x49xf32, 1x32x49x49xf32) + add_168 = paddle._C_ops.add(matmul_136, unsqueeze_66) + del matmul_136, unsqueeze_66 + + # pd_op.softmax: (-1x32x49x49xf32) <- (-1x32x49x49xf32) + softmax_22 = paddle._C_ops.softmax(add_168, -1) + del add_168 + + # pd_op.matmul: (-1x32x49x32xf32) <- (-1x32x49x49xf32, -1x32x49x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_22, slice_159, False, False) + del slice_159, softmax_22 + + # pd_op.transpose: (-1x49x32x32xf32) <- (-1x32x49x32xf32) + transpose_148 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_147 = [slice_156, full_4, full_27] + del slice_156 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_144 = paddle._C_ops.stack(combine_147, 0) + del combine_147 + + # pd_op.reshape: (-1x49x1024xf32) <- (-1x49x32x32xf32, 3xi64) + reshape_336 = paddle._C_ops.reshape(transpose_148, stack_144) + del stack_144, transpose_148 + + # pd_op.matmul: (-1x49x1024xf32) <- (-1x49x1024xf32, 1024x1024xf32) + matmul_138 = paddle._C_ops.matmul(reshape_336, parameter_23, False, False) + del parameter_23, reshape_336 + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, 1024xf32) + add_169 = paddle._C_ops.add(matmul_138, parameter_22) + del matmul_138, parameter_22 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x49x1024xf32, 4xi64) + reshape_337 = paddle._C_ops.reshape(add_169, full_int_array_43) + del add_169 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_45 = [-1, 1, 1, 7, 7, 1024] + + # pd_op.reshape: (-1x1x1x7x7x1024xf32) <- (-1x7x7x1024xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(reshape_337, full_int_array_45) + del reshape_337 + + # pd_op.transpose: (-1x1x7x1x7x1024xf32) <- (-1x1x1x7x7x1024xf32) + transpose_149 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x1x7x1x7x1024xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_149, full_int_array_43) + del transpose_149 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_148 = [slice_153, full_4, full_27] + del slice_153 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_145 = paddle._C_ops.stack(combine_148, 0) + del combine_148 + + # pd_op.reshape: (-1x49x1024xf32) <- (-1x7x7x1024xf32, 3xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, stack_145) + del reshape_339, stack_145 + + # pd_op.add: (-1x49x1024xf32) <- (-1x-1x1024xf32, -1x49x1024xf32) + add_170 = paddle._C_ops.add(matmul_134, reshape_340) + del matmul_134, reshape_340 + + # pd_op.layer_norm: (-1x49x1024xf32, -1x49xf32, -1x49xf32) <- (-1x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_170, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (-1x49x4096xf32) <- (-1x49x1024xf32, 1024x4096xf32) + matmul_139 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del layer_norm_147, parameter_19 + + # pd_op.add: (-1x49x4096xf32) <- (-1x49x4096xf32, 4096xf32) + add_171 = paddle._C_ops.add(matmul_139, parameter_18) + del matmul_139, parameter_18 + + # pd_op.gelu: (-1x49x4096xf32) <- (-1x49x4096xf32) + gelu_22 = paddle._C_ops.gelu(add_171, False) + del add_171 + + # pd_op.matmul: (-1x49x1024xf32) <- (-1x49x4096xf32, 4096x1024xf32) + matmul_140 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del gelu_22, parameter_17 + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, 1024xf32) + add_172 = paddle._C_ops.add(matmul_140, parameter_16) + del matmul_140, parameter_16 + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, -1x49x1024xf32) + add_173 = paddle._C_ops.add(add_170, add_172) + del add_170, add_172 + + # pd_op.shape64: (3xi64) <- (-1x49x1024xf32) + shape64_91 = paddle._C_ops.shape64(add_173) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_160 = paddle._C_ops.slice( + shape64_91, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_91 + + # pd_op.layer_norm: (-1x49x1024xf32, -1x49xf32, -1x49xf32) <- (-1x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_173, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_149 = [slice_160, full_3, full_3, full_27] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_146 = paddle._C_ops.stack(combine_149, 0) + del combine_149 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x49x1024xf32, 4xi64) + reshape_341 = paddle._C_ops.reshape(layer_norm_150, stack_146) + del layer_norm_150, stack_146 + + # pd_op.shape64: (4xi64) <- (-1x7x7x1024xf32) + shape64_92 = paddle._C_ops.shape64(reshape_341) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_161 = paddle._C_ops.slice( + shape64_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_92 + + # pd_op.roll: (-1x7x7x1024xf32) <- (-1x7x7x1024xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_341, full_int_array_11, [1, 2]) + del reshape_341 + + # pd_op.shape64: (4xi64) <- (-1x7x7x1024xf32) + shape64_93 = paddle._C_ops.shape64(roll_22) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_162 = paddle._C_ops.slice( + shape64_93, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_93 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_150 = [slice_162, full_44, full_3, full_44, full_3, full_27] + del full_3, slice_162 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_147 = paddle._C_ops.stack(combine_150, 0) + del combine_150 + + # pd_op.reshape: (-1x1x7x1x7x1024xf32) <- (-1x7x7x1024xf32, 6xi64) + reshape_342 = paddle._C_ops.reshape(roll_22, stack_147) + del roll_22, stack_147 + + # pd_op.transpose: (-1x1x1x7x7x1024xf32) <- (-1x1x7x1x7x1024xf32) + transpose_150 = paddle._C_ops.transpose(reshape_342, [0, 1, 3, 2, 4, 5]) + del reshape_342 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x1x1x7x7x1024xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(transpose_150, full_int_array_43) + del transpose_150 + + # pd_op.reshape: (-1x49x1024xf32) <- (-1x7x7x1024xf32, 3xi64) + reshape_344 = paddle._C_ops.reshape(reshape_343, full_int_array_44) + del full_int_array_44, reshape_343 + + # pd_op.full: (1x7x7x1xf32) <- () + full_45 = paddle._C_ops.full( + [1, 7, 7, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_45, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_45, full_int_array_12 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_15, set_value__100 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_17, full_int_array_18, set_value__101 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_19, set_value__102 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_13, set_value__103 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_16, full_int_array_21, set_value__104 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_22, full_int_array_23, set_value__105 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_20, full_int_array_24, set_value__106 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_11, full_int_array_25, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_46 = [1, 1, 7, 1, 7, 1] + + # pd_op.reshape: (1x1x7x1x7x1xf32) <- (1x7x7x1xf32, 6xi64) + reshape_345 = paddle._C_ops.reshape(set_value__11, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (1x1x1x7x7x1xf32) <- (1x1x7x1x7x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_345, [0, 1, 3, 2, 4, 5]) + del reshape_345 + + # pd_op.reshape: (1x7x7x1xf32) <- (1x1x1x7x7x1xf32, 4xi64) + reshape_346 = paddle._C_ops.reshape(transpose_151, full_int_array_27) + del full_int_array_27, transpose_151 + + # pd_op.reshape: (1x49xf32) <- (1x7x7x1xf32, 2xi64) + reshape_347 = paddle._C_ops.reshape(reshape_346, full_int_array_28) + del full_int_array_28, reshape_346 + + # pd_op.unsqueeze: (1x1x49xf32) <- (1x49xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_347, full_int_array_1) + + # pd_op.unsqueeze: (1x49x1xf32) <- (1x49xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_347, full_int_array_5) + del reshape_347 + + # pd_op.subtract: (1x49x49xf32) <- (1x1x49xf32, 1x49x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (1x49x49xb) <- (1x49x49xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_11) + + # pd_op.full: (1x49x49xf32) <- () + full_46 = paddle._C_ops.full( + [1, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_46, subtract_11) + del full_46, not_equal_11, subtract_11 + + # pd_op.equal: (1x49x49xb) <- (1x49x49xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_11) + del full_11 + + # pd_op.full: (1x49x49xf32) <- () + full_47 = paddle._C_ops.full( + [1, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_23 = paddle._C_ops.where(equal_11, full_47, where_22) + del equal_11, full_47, where_22 + + # pd_op.shape64: (3xi64) <- (-1x49x1024xf32) + shape64_94 = paddle._C_ops.shape64(reshape_344) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_163 = paddle._C_ops.slice( + shape64_94, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_94 + + # pd_op.matmul: (-1x49x3072xf32) <- (-1x49x1024xf32, 1024x3072xf32) + matmul_141 = paddle._C_ops.matmul(reshape_344, parameter_13, False, False) + del parameter_13, reshape_344 + + # pd_op.add: (-1x49x3072xf32) <- (-1x49x3072xf32, 3072xf32) + add_174 = paddle._C_ops.add(matmul_141, parameter_12) + del matmul_141, parameter_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_151 = [slice_163, full_4, full_5, full_7, full_7] + del full_5 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_148 = paddle._C_ops.stack(combine_151, 0) + del combine_151 + + # pd_op.reshape: (-1x49x3x32x32xf32) <- (-1x49x3072xf32, 5xi64) + reshape_348 = paddle._C_ops.reshape(add_174, stack_148) + del add_174, stack_148 + + # pd_op.transpose: (3x-1x32x49x32xf32) <- (-1x49x3x32x32xf32) + transpose_152 = paddle._C_ops.transpose(reshape_348, [2, 0, 3, 1, 4]) + del reshape_348 + + # pd_op.slice: (-1x32x49x32xf32) <- (3x-1x32x49x32xf32, 1xi64, 1xi64) + slice_164 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x32x49x32xf32) <- (3x-1x32x49x32xf32, 1xi64, 1xi64) + slice_165 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x32x49x32xf32) <- (3x-1x32x49x32xf32, 1xi64, 1xi64) + slice_166 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del full_int_array_6, transpose_152 + + # pd_op.scale: (-1x32x49x32xf32) <- (-1x32x49x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_164, full_8, float("0"), True) + del full_8, slice_164 + + # pd_op.transpose: (-1x32x32x49xf32) <- (-1x32x49x32xf32) + transpose_153 = paddle._C_ops.transpose(slice_165, [0, 1, 3, 2]) + del slice_165 + + # pd_op.matmul: (-1x32x49x49xf32) <- (-1x32x49x32xf32, -1x32x32x49xf32) + matmul_142 = paddle._C_ops.matmul(scale_23, transpose_153, False, False) + del scale_23, transpose_153 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_349 = paddle._C_ops.reshape(data_47, full_int_array_7) + del data_47, full_int_array_7 + + # pd_op.index_select: (2401x32xf32) <- (169x32xf32, 2401xi64) + index_select_23 = paddle._C_ops.index_select(data_48, reshape_349, 0) + del data_48, reshape_349 + + # pd_op.reshape: (49x49x32xf32) <- (2401x32xf32, 3xi64) + reshape_350 = paddle._C_ops.reshape(index_select_23, full_int_array_8) + del full_int_array_8, index_select_23 + + # pd_op.transpose: (32x49x49xf32) <- (49x49x32xf32) + transpose_154 = paddle._C_ops.transpose(reshape_350, [2, 0, 1]) + del reshape_350 + + # pd_op.unsqueeze: (1x32x49x49xf32) <- (32x49x49xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(transpose_154, full_int_array_0) + del transpose_154 + + # pd_op.add: (-1x32x49x49xf32) <- (-1x32x49x49xf32, 1x32x49x49xf32) + add_175 = paddle._C_ops.add(matmul_142, unsqueeze_69) + del matmul_142, unsqueeze_69 + + # pd_op.full: (xi64) <- () + full_48 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_11 = paddle._C_ops.floor_divide(slice_163, full_48) + del full_48 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_152 = [floor_divide_11, full_44, full_7, full_4, full_4] + del floor_divide_11, full_44 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_149 = paddle._C_ops.stack(combine_152, 0) + del combine_152 + + # pd_op.reshape: (-1x1x32x49x49xf32) <- (-1x32x49x49xf32, 5xi64) + reshape_351 = paddle._C_ops.reshape(add_175, stack_149) + del add_175, stack_149 + + # pd_op.unsqueeze: (1x1x49x49xf32) <- (1x49x49xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(where_23, full_int_array_1) + del full_int_array_1, where_23 + + # pd_op.unsqueeze: (1x1x1x49x49xf32) <- (1x1x49x49xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(unsqueeze_70, full_int_array_0) + del full_int_array_0, unsqueeze_70 + + # pd_op.add: (-1x1x32x49x49xf32) <- (-1x1x32x49x49xf32, 1x1x1x49x49xf32) + add_176 = paddle._C_ops.add(reshape_351, unsqueeze_71) + del reshape_351, unsqueeze_71 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_153 = [slice_163, full_7, full_4, full_4] + del full_7 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_150 = paddle._C_ops.stack(combine_153, 0) + del combine_153 + + # pd_op.reshape: (-1x32x49x49xf32) <- (-1x1x32x49x49xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(add_176, stack_150) + del add_176, stack_150 + + # pd_op.softmax: (-1x32x49x49xf32) <- (-1x32x49x49xf32) + softmax_23 = paddle._C_ops.softmax(reshape_352, -1) + del reshape_352 + + # pd_op.matmul: (-1x32x49x32xf32) <- (-1x32x49x49xf32, -1x32x49x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_23, slice_166, False, False) + del slice_166, softmax_23 + + # pd_op.transpose: (-1x49x32x32xf32) <- (-1x32x49x32xf32) + transpose_155 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_154 = [slice_163, full_4, full_27] + del slice_163 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_151 = paddle._C_ops.stack(combine_154, 0) + del combine_154 + + # pd_op.reshape: (-1x49x1024xf32) <- (-1x49x32x32xf32, 3xi64) + reshape_353 = paddle._C_ops.reshape(transpose_155, stack_151) + del stack_151, transpose_155 + + # pd_op.matmul: (-1x49x1024xf32) <- (-1x49x1024xf32, 1024x1024xf32) + matmul_144 = paddle._C_ops.matmul(reshape_353, parameter_11, False, False) + del parameter_11, reshape_353 + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, 1024xf32) + add_177 = paddle._C_ops.add(matmul_144, parameter_10) + del matmul_144, parameter_10 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x49x1024xf32, 4xi64) + reshape_354 = paddle._C_ops.reshape(add_177, full_int_array_43) + del add_177 + + # pd_op.reshape: (-1x1x1x7x7x1024xf32) <- (-1x7x7x1024xf32, 6xi64) + reshape_355 = paddle._C_ops.reshape(reshape_354, full_int_array_45) + del full_int_array_45, reshape_354 + + # pd_op.transpose: (-1x1x7x1x7x1024xf32) <- (-1x1x1x7x7x1024xf32) + transpose_156 = paddle._C_ops.transpose(reshape_355, [0, 1, 3, 2, 4, 5]) + del reshape_355 + + # pd_op.reshape: (-1x7x7x1024xf32) <- (-1x1x7x1x7x1024xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(transpose_156, full_int_array_43) + del full_int_array_43, transpose_156 + + # pd_op.roll: (-1x7x7x1024xf32) <- (-1x7x7x1024xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_356, full_int_array_29, [1, 2]) + del full_int_array_29, reshape_356 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_155 = [slice_160, full_4, full_27] + del full_27, full_4, slice_160 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_152 = paddle._C_ops.stack(combine_155, 0) + del combine_155 + + # pd_op.reshape: (-1x49x1024xf32) <- (-1x7x7x1024xf32, 3xi64) + reshape_357 = paddle._C_ops.reshape(roll_23, stack_152) + del roll_23, stack_152 + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, -1x49x1024xf32) + add_178 = paddle._C_ops.add(add_173, reshape_357) + del add_173, reshape_357 + + # pd_op.layer_norm: (-1x49x1024xf32, -1x49xf32, -1x49xf32) <- (-1x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_178, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (-1x49x4096xf32) <- (-1x49x1024xf32, 1024x4096xf32) + matmul_145 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del layer_norm_153, parameter_7 + + # pd_op.add: (-1x49x4096xf32) <- (-1x49x4096xf32, 4096xf32) + add_179 = paddle._C_ops.add(matmul_145, parameter_6) + del matmul_145, parameter_6 + + # pd_op.gelu: (-1x49x4096xf32) <- (-1x49x4096xf32) + gelu_23 = paddle._C_ops.gelu(add_179, False) + del add_179 + + # pd_op.matmul: (-1x49x1024xf32) <- (-1x49x4096xf32, 4096x1024xf32) + matmul_146 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del gelu_23, parameter_5 + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, 1024xf32) + add_180 = paddle._C_ops.add(matmul_146, parameter_4) + del matmul_146, parameter_4 + + # pd_op.add: (-1x49x1024xf32) <- (-1x49x1024xf32, -1x49x1024xf32) + add_181 = paddle._C_ops.add(add_178, add_180) + del add_178, add_180 + + # pd_op.layer_norm: (-1x49x1024xf32, -1x49xf32, -1x49xf32) <- (-1x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_156, layer_norm_157, layer_norm_158 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_181, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_181, parameter_2, parameter_3 + + # pd_op.transpose: (-1x1024x49xf32) <- (-1x49x1024xf32) + transpose_157 = paddle._C_ops.transpose(layer_norm_156, [0, 2, 1]) + del layer_norm_156 + + # pd_op.unsqueeze: (-1x1024x1x49xf32) <- (-1x1024x49xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(transpose_157, full_int_array_5) + del transpose_157 + + # pd_op.pool2d: (-1x1024x1x1xf32) <- (-1x1024x1x49xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_72, + full_int_array_14, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_14, unsqueeze_72 + + # pd_op.squeeze: (-1x1024x1xf32) <- (-1x1024x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_5) + del full_int_array_5, pool2d_0 + + # pd_op.flatten: (-1x1024xf32) <- (-1x1024x1xf32) + flatten_1 = paddle._C_ops.flatten(squeeze_0, 1, 2) + del squeeze_0 + + # pd_op.matmul: (-1x102xf32) <- (-1x1024xf32, 1024x102xf32) + matmul_147 = paddle._C_ops.matmul(flatten_1, parameter_1, False, False) + del flatten_1, parameter_1 + + # pd_op.add: (-1x102xf32) <- (-1x102xf32, 102xf32) + add_0 = paddle._C_ops.add(matmul_147, parameter_0) + del matmul_147, parameter_0 + + return ( + add_0, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/weight_meta.py new file mode 100644 index 00000000..69d1deec --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_1/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [1024, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [4096, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1024, 4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [1024, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1024, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [4096, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [1024, 4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1024, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [1024, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [2048, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [1024, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1024, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [256, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [256, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [256, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [1024, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [256, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [256, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [256, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [512, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [512, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [128, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [128, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [128, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [512, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [128, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [128, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [128, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [128, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/graph_net.json b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/input_meta.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/input_meta.py new file mode 100644 index 00000000..75ce51a3 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [16, 3, 224, 224] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [169, 32] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [169, 8] + dtype = "float32" + low = -6.02162 + high = 7.45984 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [169, 32] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [169, 4] + dtype = "float32" + low = -8.34796 + high = 5.14563 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [169, 8] + dtype = "float32" + low = -7.64378 + high = 5.6449 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [169, 4] + dtype = "float32" + low = -6.26646 + high = 5.23088 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [169, 16] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/model.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/model.py new file mode 100644 index 00000000..2a68cd79 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/model.py @@ -0,0 +1,10708 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.conv2d: (16x128x56x56xf32) <- (16x3x224x224xf32, 128x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, -1, 1, 1] + + # pd_op.reshape: (1x128x1x1xf32) <- (128xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_9) + del full_int_array_9, parameter_303 + + # pd_op.add: (16x128x56x56xf32) <- (16x128x56x56xf32, 1x128x1x1xf32) + add_0 = paddle._C_ops.add(conv2d_0, reshape_0) + + # pd_op.flatten: (16x128x3136xf32) <- (16x128x56x56xf32) + flatten_1 = paddle._C_ops.flatten(add_0, 2, 3) + + # pd_op.transpose: (16x3136x128xf32) <- (16x128x3136xf32) + transpose_0 = paddle._C_ops.transpose(flatten_1, [0, 2, 1]) + del flatten_1 + + # pd_op.layer_norm: (16x3136x128xf32, 16x3136xf32, 16x3136xf32) <- (16x3136x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302 + + # pd_op.layer_norm: (16x3136x128xf32, 16x3136xf32, 16x3136xf32) <- (16x3136x128xf32, 128xf32, 128xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [16, 56, 56, 128] + + # pd_op.reshape: (16x56x56x128xf32) <- (16x3136x128xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, full_int_array_10) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_11 = [16, 8, 7, 8, 7, 128] + + # pd_op.reshape: (16x8x7x8x7x128xf32) <- (16x56x56x128xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_1, full_int_array_11) + + # pd_op.transpose: (16x8x8x7x7x128xf32) <- (16x8x7x8x7x128xf32) + transpose_1 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_12 = [-1, 7, 7, 128] + + # pd_op.reshape: (1024x7x7x128xf32) <- (16x8x8x7x7x128xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(transpose_1, full_int_array_12) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_13 = [-1, 49, 128] + + # pd_op.reshape: (1024x49x128xf32) <- (1024x7x7x128xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(reshape_2, full_int_array_13) + + # pd_op.matmul: (1024x49x384xf32) <- (1024x49x128xf32, 128x384xf32) + matmul_0 = paddle._C_ops.matmul(reshape_3, parameter_298, False, False) + del parameter_298 + + # pd_op.add: (1024x49x384xf32) <- (1024x49x384xf32, 384xf32) + add_1 = paddle._C_ops.add(matmul_0, parameter_297) + del parameter_297 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_14 = [1024, 49, 3, 4, 32] + + # pd_op.reshape: (1024x49x3x4x32xf32) <- (1024x49x384xf32, 5xi64) + reshape_212 = paddle._C_ops.reshape(add_1, full_int_array_14) + + # pd_op.transpose: (3x1024x4x49x32xf32) <- (1024x49x3x4x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_212, [2, 0, 3, 1, 4]) + del reshape_212 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_264 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_257 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_254 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_247 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_231 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_224 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_221 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_214 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_211 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_204 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_201 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_194 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_191 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_184 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_181 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_174 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_171 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_164 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_161 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_154 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_151 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_144 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_141 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_134 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_131 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_124 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_121 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_114 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_111 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_104 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_101 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_94 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_91 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_84 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_81 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_74 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_71 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_64 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_61 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_54 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_38 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_31 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_28 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_21 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_10 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_3 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_2 = full_int_array_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [1] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_259 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_258 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_249 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_248 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_226 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_225 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_216 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_215 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_206 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_205 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_196 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_195 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_186 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_185 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_176 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_175 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_166 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_165 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_156 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_155 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_146 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_145 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_136 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_135 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_126 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_125 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_116 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_115 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_106 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_105 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_96 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_95 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_86 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_85 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_76 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_75 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_66 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_65 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_56 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_55 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_33 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_32 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_23 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_22 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_5 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_4 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_0 = full_int_array_1 + + # pd_op.slice: (1024x4x49x32xf32) <- (3x1024x4x49x32xf32, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [2] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_269 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_267 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_261 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_260 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_251 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_250 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_228 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_227 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_218 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_217 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_208 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_207 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_198 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_197 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_188 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_187 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_178 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_177 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_168 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_167 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_158 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_157 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_148 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_147 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_138 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_137 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_128 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_127 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_118 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_117 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_108 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_107 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_98 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_97 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_88 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_87 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_78 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_77 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_68 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_67 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_58 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_57 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_35 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_34 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_25 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_24 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_7 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_6 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_1 = full_int_array_2 + + # pd_op.slice: (1024x4x49x32xf32) <- (3x1024x4x49x32xf32, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [3] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_262 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_252 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_229 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_219 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_209 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_199 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_189 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_179 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_169 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_159 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_149 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_139 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_129 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_119 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_109 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_99 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_89 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_79 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_69 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_59 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_36 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_26 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_8 = full_int_array_3 + + # pd_op.slice: (1024x4x49x32xf32) <- (3x1024x4x49x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xf32) <- (1xf32) + assign_263 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_253 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_230 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_220 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_210 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_200 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_190 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_180 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_170 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_160 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_150 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_140 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_130 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_120 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_110 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_100 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_90 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_80 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_70 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_60 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_37 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_27 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_9 = full_0 + + # pd_op.scale: (1024x4x49x32xf32) <- (1024x4x49x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_24, full_0, float("0"), True) + del slice_24 + + # pd_op.transpose: (1024x4x32x49xf32) <- (1024x4x49x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_25, [0, 1, 3, 2]) + del slice_25 + + # pd_op.matmul: (1024x4x49x49xf32) <- (1024x4x49x32xf32, 1024x4x32x49xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_15 = [-1] + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_4 = paddle._C_ops.reshape(data_23, full_int_array_15) + del data_23 + + # pd_op.index_select: (2401x4xf32) <- (169x4xf32, 2401xi64) + index_select_0 = paddle._C_ops.index_select(data_24, reshape_4, 0) + del data_24 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_16 = [49, 49, -1] + + # pd_op.reshape: (49x49x4xf32) <- (2401x4xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(index_select_0, full_int_array_16) + + # pd_op.transpose: (4x49x49xf32) <- (49x49x4xf32) + transpose_4 = paddle._C_ops.transpose(reshape_213, [2, 0, 1]) + del reshape_213 + + # pd_op.unsqueeze: (1x4x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_0) + + # pd_op.add: (1024x4x49x49xf32) <- (1024x4x49x49xf32, 1x4x49x49xf32) + add_170 = paddle._C_ops.add(matmul_1, unsqueeze_0) + + # pd_op.softmax: (1024x4x49x49xf32) <- (1024x4x49x49xf32) + softmax_0 = paddle._C_ops.softmax(add_170, -1) + del add_170 + + # pd_op.matmul: (1024x4x49x32xf32) <- (1024x4x49x49xf32, 1024x4x49x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_0, slice_0, False, False) + + # pd_op.transpose: (1024x49x4x32xf32) <- (1024x4x49x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_17 = [1024, 49, 128] + + # pd_op.reshape: (1024x49x128xf32) <- (1024x49x4x32xf32, 3xi64) + reshape_5 = paddle._C_ops.reshape(transpose_5, full_int_array_17) + + # pd_op.matmul: (1024x49x128xf32) <- (1024x49x128xf32, 128x128xf32) + matmul_2 = paddle._C_ops.matmul(reshape_5, parameter_296, False, False) + del parameter_296 + + # pd_op.add: (1024x49x128xf32) <- (1024x49x128xf32, 128xf32) + add_2 = paddle._C_ops.add(matmul_2, parameter_295) + del parameter_295 + + # pd_op.reshape: (1024x7x7x128xf32) <- (1024x49x128xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_2, full_int_array_12) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_18 = [-1, 8, 8, 7, 7, 128] + + # pd_op.reshape: (16x8x8x7x7x128xf32) <- (1024x7x7x128xf32, 6xi64) + reshape_214 = paddle._C_ops.reshape(reshape_6, full_int_array_18) + + # pd_op.transpose: (16x8x7x8x7x128xf32) <- (16x8x8x7x7x128xf32) + transpose_6 = paddle._C_ops.transpose(reshape_214, [0, 1, 3, 2, 4, 5]) + del reshape_214 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_19 = [-1, 56, 56, 128] + + # pd_op.reshape: (16x56x56x128xf32) <- (16x8x7x8x7x128xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(transpose_6, full_int_array_19) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_20 = [16, 3136, 128] + + # pd_op.reshape: (16x3136x128xf32) <- (16x56x56x128xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(reshape_7, full_int_array_20) + + # pd_op.add: (16x3136x128xf32) <- (16x3136x128xf32, 16x3136x128xf32) + add_3 = paddle._C_ops.add(layer_norm_0, reshape_8) + + # pd_op.layer_norm: (16x3136x128xf32, 16x3136xf32, 16x3136xf32) <- (16x3136x128xf32, 128xf32, 128xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_3, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (16x3136x512xf32) <- (16x3136x128xf32, 128x512xf32) + matmul_3 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del parameter_292 + + # pd_op.add: (16x3136x512xf32) <- (16x3136x512xf32, 512xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_291) + del parameter_291 + + # pd_op.gelu: (16x3136x512xf32) <- (16x3136x512xf32) + gelu_0 = paddle._C_ops.gelu(add_4, False) + + # pd_op.matmul: (16x3136x128xf32) <- (16x3136x512xf32, 512x128xf32) + matmul_4 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del parameter_290 + + # pd_op.add: (16x3136x128xf32) <- (16x3136x128xf32, 128xf32) + add_5 = paddle._C_ops.add(matmul_4, parameter_289) + del parameter_289 + + # pd_op.add: (16x3136x128xf32) <- (16x3136x128xf32, 16x3136x128xf32) + add_6 = paddle._C_ops.add(add_3, add_5) + + # pd_op.layer_norm: (16x3136x128xf32, 16x3136xf32, 16x3136xf32) <- (16x3136x128xf32, 128xf32, 128xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # pd_op.reshape: (16x56x56x128xf32) <- (16x3136x128xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(layer_norm_9, full_int_array_10) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_4 = [-3, -3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_256 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_223 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_203 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_183 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_163 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_143 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_123 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_103 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_83 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_63 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_30 = full_int_array_4 + + # pd_op.roll: (16x56x56x128xf32) <- (16x56x56x128xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_9, full_int_array_4, [1, 2]) + + # pd_op.reshape: (16x8x7x8x7x128xf32) <- (16x56x56x128xf32, 6xi64) + reshape_215 = paddle._C_ops.reshape(roll_0, full_int_array_11) + del full_int_array_11 + + # pd_op.transpose: (16x8x8x7x7x128xf32) <- (16x8x7x8x7x128xf32) + transpose_7 = paddle._C_ops.transpose(reshape_215, [0, 1, 3, 2, 4, 5]) + del reshape_215 + + # pd_op.reshape: (1024x7x7x128xf32) <- (16x8x8x7x7x128xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(transpose_7, full_int_array_12) + + # pd_op.reshape: (1024x49x128xf32) <- (1024x7x7x128xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(reshape_10, full_int_array_13) + del full_int_array_13 + + # pd_op.full: (1x56x56x1xf32) <- () + full_25 = paddle._C_ops.full( + [1, 56, 56, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [0, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_234 = full_int_array_21 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_41 = full_int_array_21 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_12 = full_int_array_21 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-7, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [1, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_268 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_243 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_50 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_18 = full_int_array_23 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_25, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_25 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [0, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [-7, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_26 = [0, -3] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_27 = [-7, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [-7, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [-3, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_30 = [-3, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_31 = [-3, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [2147483647, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_33 = [2147483647, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_34 = [2147483647, 2147483647] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_244 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_241 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_238 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_235 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_51 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_48 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_45 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_42 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_19 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_16 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_14 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_13 = full_int_array_34 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [1, 8, 7, 8, 7, 1] + + # pd_op.reshape: (1x8x7x8x7x1xf32) <- (1x56x56x1xf32, 6xi64) + reshape_216 = paddle._C_ops.reshape(set_value__0, full_int_array_35) + del full_int_array_35 + + # pd_op.transpose: (1x8x8x7x7x1xf32) <- (1x8x7x8x7x1xf32) + transpose_146 = paddle._C_ops.transpose(reshape_216, [0, 1, 3, 2, 4, 5]) + del reshape_216 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 7, 7, 1] + + # pd_op.reshape: (64x7x7x1xf32) <- (1x8x8x7x7x1xf32, 4xi64) + reshape_217 = paddle._C_ops.reshape(transpose_146, full_int_array_36) + del transpose_146 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_37 = [-1, 49] + + # pd_op.reshape: (64x49xf32) <- (64x7x7x1xf32, 2xi64) + reshape_218 = paddle._C_ops.reshape(reshape_217, full_int_array_37) + del reshape_217 + + # pd_op.unsqueeze: (64x1x49xf32) <- (64x49xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_1) + + # pd_op.unsqueeze: (64x49x1xf32) <- (64x49xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_2) + del reshape_218 + + # pd_op.subtract: (64x49x49xf32) <- (64x1x49xf32, 64x49x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.full: (xf32) <- () + full_26 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x49x49xb) <- (64x49x49xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_26) + + # pd_op.full: (64x49x49xf32) <- () + full_27 = paddle._C_ops.full( + [64, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_27, subtract_0) + del full_27, not_equal_0, subtract_0 + + # pd_op.equal: (64x49x49xb) <- (64x49x49xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_26) + + # pd_op.full: (64x49x49xf32) <- () + full_28 = paddle._C_ops.full( + [64, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_1 = paddle._C_ops.where(equal_0, full_28, where_0) + del equal_0, full_28, where_0 + + # pd_op.matmul: (1024x49x384xf32) <- (1024x49x128xf32, 128x384xf32) + matmul_5 = paddle._C_ops.matmul(reshape_11, parameter_286, False, False) + del parameter_286 + + # pd_op.add: (1024x49x384xf32) <- (1024x49x384xf32, 384xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_285) + del parameter_285 + + # pd_op.reshape: (1024x49x3x4x32xf32) <- (1024x49x384xf32, 5xi64) + reshape_219 = paddle._C_ops.reshape(add_7, full_int_array_14) + del full_int_array_14 + + # pd_op.transpose: (3x1024x4x49x32xf32) <- (1024x49x3x4x32xf32) + transpose_8 = paddle._C_ops.transpose(reshape_219, [2, 0, 3, 1, 4]) + del reshape_219 + + # pd_op.slice: (1024x4x49x32xf32) <- (3x1024x4x49x32xf32, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (1024x4x49x32xf32) <- (3x1024x4x49x32xf32, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (1024x4x49x32xf32) <- (3x1024x4x49x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (1024x4x49x32xf32) <- (1024x4x49x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_26, full_0, float("0"), True) + del slice_26 + + # pd_op.transpose: (1024x4x32x49xf32) <- (1024x4x49x32xf32) + transpose_9 = paddle._C_ops.transpose(slice_27, [0, 1, 3, 2]) + del slice_27 + + # pd_op.matmul: (1024x4x49x49xf32) <- (1024x4x49x32xf32, 1024x4x32x49xf32) + matmul_6 = paddle._C_ops.matmul(scale_1, transpose_9, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_12 = paddle._C_ops.reshape(data_43, full_int_array_15) + del data_43 + + # pd_op.index_select: (2401x4xf32) <- (169x4xf32, 2401xi64) + index_select_1 = paddle._C_ops.index_select(data_46, reshape_12, 0) + del data_46 + + # pd_op.reshape: (49x49x4xf32) <- (2401x4xf32, 3xi64) + reshape_220 = paddle._C_ops.reshape(index_select_1, full_int_array_16) + + # pd_op.transpose: (4x49x49xf32) <- (49x49x4xf32) + transpose_10 = paddle._C_ops.transpose(reshape_220, [2, 0, 1]) + del reshape_220 + + # pd_op.unsqueeze: (1x4x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(transpose_10, full_int_array_0) + + # pd_op.add: (1024x4x49x49xf32) <- (1024x4x49x49xf32, 1x4x49x49xf32) + add_8 = paddle._C_ops.add(matmul_6, unsqueeze_1) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_38 = [16, 64, 4, 49, 49] + + # pd_op.reshape: (16x64x4x49x49xf32) <- (1024x4x49x49xf32, 5xi64) + reshape_13 = paddle._C_ops.reshape(add_8, full_int_array_38) + del full_int_array_38 + + # pd_op.unsqueeze: (64x1x49x49xf32) <- (64x49x49xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(where_1, full_int_array_1) + del where_1 + + # pd_op.unsqueeze: (1x64x1x49x49xf32) <- (64x1x49x49xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(unsqueeze_39, full_int_array_0) + del unsqueeze_39 + + # pd_op.add: (16x64x4x49x49xf32) <- (16x64x4x49x49xf32, 1x64x1x49x49xf32) + add_9 = paddle._C_ops.add(reshape_13, unsqueeze_2) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_39 = [1024, 4, 49, 49] + + # pd_op.reshape: (1024x4x49x49xf32) <- (16x64x4x49x49xf32, 4xi64) + reshape_221 = paddle._C_ops.reshape(add_9, full_int_array_39) + del full_int_array_39 + + # pd_op.softmax: (1024x4x49x49xf32) <- (1024x4x49x49xf32) + softmax_1 = paddle._C_ops.softmax(reshape_221, -1) + del reshape_221 + + # pd_op.matmul: (1024x4x49x32xf32) <- (1024x4x49x49xf32, 1024x4x49x32xf32) + matmul_125 = paddle._C_ops.matmul(softmax_1, slice_1, False, False) + + # pd_op.transpose: (1024x49x4x32xf32) <- (1024x4x49x32xf32) + transpose_11 = paddle._C_ops.transpose(matmul_125, [0, 2, 1, 3]) + del matmul_125 + + # pd_op.reshape: (1024x49x128xf32) <- (1024x49x4x32xf32, 3xi64) + reshape_14 = paddle._C_ops.reshape(transpose_11, full_int_array_17) + del full_int_array_17 + + # pd_op.matmul: (1024x49x128xf32) <- (1024x49x128xf32, 128x128xf32) + matmul_7 = paddle._C_ops.matmul(reshape_14, parameter_284, False, False) + del parameter_284 + + # pd_op.add: (1024x49x128xf32) <- (1024x49x128xf32, 128xf32) + add_10 = paddle._C_ops.add(matmul_7, parameter_283) + del parameter_283 + + # pd_op.reshape: (1024x7x7x128xf32) <- (1024x49x128xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(add_10, full_int_array_12) + del full_int_array_12 + + # pd_op.reshape: (16x8x8x7x7x128xf32) <- (1024x7x7x128xf32, 6xi64) + reshape_222 = paddle._C_ops.reshape(reshape_15, full_int_array_18) + del full_int_array_18 + + # pd_op.transpose: (16x8x7x8x7x128xf32) <- (16x8x8x7x7x128xf32) + transpose_12 = paddle._C_ops.transpose(reshape_222, [0, 1, 3, 2, 4, 5]) + del reshape_222 + + # pd_op.reshape: (16x56x56x128xf32) <- (16x8x7x8x7x128xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(transpose_12, full_int_array_19) + del full_int_array_19 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [3, 3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_265 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_232 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_212 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_192 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_172 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_152 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_132 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_112 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_92 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_72 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_39 = full_int_array_5 + + # pd_op.roll: (16x56x56x128xf32) <- (16x56x56x128xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_16, full_int_array_5, [1, 2]) + + # pd_op.reshape: (16x3136x128xf32) <- (16x56x56x128xf32, 3xi64) + reshape_17 = paddle._C_ops.reshape(roll_1, full_int_array_20) + del full_int_array_20 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], + float("0.978261"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_11 = full_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_40 = [16, 1, 1] + + # pd_op.full: (1xf32) <- () + full_29 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf32) <- () + full_30 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_0 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_171 = paddle._C_ops.add(full_1, uniform_0) + del uniform_0 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_0 = paddle._C_ops.floor(add_171) + del add_171 + + # pd_op.divide: (16x3136x128xf32) <- (16x3136x128xf32, xf32) + divide_0 = paddle._C_ops.divide(reshape_17, full_1) + + # pd_op.multiply: (16x3136x128xf32) <- (16x3136x128xf32, 16x1x1xf32) + multiply_0 = paddle._C_ops.multiply(divide_0, floor_0) + + # pd_op.add: (16x3136x128xf32) <- (16x3136x128xf32, 16x3136x128xf32) + add_11 = paddle._C_ops.add(add_6, multiply_0) + + # pd_op.layer_norm: (16x3136x128xf32, 16x3136xf32, 16x3136xf32) <- (16x3136x128xf32, 128xf32, 128xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (16x3136x512xf32) <- (16x3136x128xf32, 128x512xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del parameter_280 + + # pd_op.add: (16x3136x512xf32) <- (16x3136x512xf32, 512xf32) + add_12 = paddle._C_ops.add(matmul_8, parameter_279) + del parameter_279 + + # pd_op.gelu: (16x3136x512xf32) <- (16x3136x512xf32) + gelu_1 = paddle._C_ops.gelu(add_12, False) + + # pd_op.matmul: (16x3136x128xf32) <- (16x3136x512xf32, 512x128xf32) + matmul_9 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del parameter_278 + + # pd_op.add: (16x3136x128xf32) <- (16x3136x128xf32, 128xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_277) + del parameter_277 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_1 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_172 = paddle._C_ops.add(full_1, uniform_1) + del uniform_1 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_1 = paddle._C_ops.floor(add_172) + del add_172 + + # pd_op.divide: (16x3136x128xf32) <- (16x3136x128xf32, xf32) + divide_1 = paddle._C_ops.divide(add_13, full_1) + + # pd_op.multiply: (16x3136x128xf32) <- (16x3136x128xf32, 16x1x1xf32) + multiply_1 = paddle._C_ops.multiply(divide_1, floor_1) + + # pd_op.add: (16x3136x128xf32) <- (16x3136x128xf32, 16x3136x128xf32) + add_14 = paddle._C_ops.add(add_11, multiply_1) + + # pd_op.reshape: (16x56x56x128xf32) <- (16x3136x128xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_14, full_int_array_10) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_6 = [2, 2] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_245 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_242 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_239 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_236 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_52 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_49 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_46 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_43 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_20 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_17 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_15 = full_int_array_6 + + # pd_op.strided_slice: (16x28x28x128xf32) <- (16x56x56x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_7 = [1, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_237 = full_int_array_7 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_44 = full_int_array_7 + + # pd_op.strided_slice: (16x28x28x128xf32) <- (16x56x56x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_8 = [0, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_240 = full_int_array_8 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_47 = full_int_array_8 + + # pd_op.strided_slice: (16x28x28x128xf32) <- (16x56x56x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (16x28x28x128xf32) <- (16x56x56x128xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (16x56x56x128xf32) <- (16x56x56x128xf32, 4xi64) + reshape_223 = paddle._C_ops.reshape(reshape_18, full_int_array_10) + del full_int_array_10 + + # pd_op.full: (1xi32) <- () + full_2 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xi32) <- (1xi32) + assign_246 = full_2 + + # pd_op.assign: (1xi32) <- (1xi32) + assign_53 = full_2 + + # builtin.combine: ([16x28x28x128xf32, 16x28x28x128xf32, 16x28x28x128xf32, 16x28x28x128xf32]) <- (16x28x28x128xf32, 16x28x28x128xf32, 16x28x28x128xf32, 16x28x28x128xf32) + combine_0 = [strided_slice_0, strided_slice_1, strided_slice_2, strided_slice_3] + + # pd_op.concat: (16x28x28x512xf32) <- ([16x28x28x128xf32, 16x28x28x128xf32, 16x28x28x128xf32, 16x28x28x128xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_0, full_2) + del combine_0 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_41 = [16, -1, 512] + + # pd_op.reshape: (16x784x512xf32) <- (16x28x28x512xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(concat_0, full_int_array_41) + del full_int_array_41 + + # pd_op.layer_norm: (16x784x512xf32, 16x784xf32, 16x784xf32) <- (16x784x512xf32, 512xf32, 512xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_19, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276 + + # pd_op.matmul: (16x784x256xf32) <- (16x784x512xf32, 512x256xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del parameter_274 + + # pd_op.layer_norm: (16x784x256xf32, 16x784xf32, 16x784xf32) <- (16x784x256xf32, 256xf32, 256xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_10, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_42 = [16, 28, 28, 256] + + # pd_op.reshape: (16x28x28x256xf32) <- (16x784x256xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(layer_norm_18, full_int_array_42) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_43 = [16, 4, 7, 4, 7, 256] + + # pd_op.reshape: (16x4x7x4x7x256xf32) <- (16x28x28x256xf32, 6xi64) + reshape_224 = paddle._C_ops.reshape(reshape_20, full_int_array_43) + + # pd_op.transpose: (16x4x4x7x7x256xf32) <- (16x4x7x4x7x256xf32) + transpose_13 = paddle._C_ops.transpose(reshape_224, [0, 1, 3, 2, 4, 5]) + del reshape_224 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_44 = [-1, 7, 7, 256] + + # pd_op.reshape: (256x7x7x256xf32) <- (16x4x4x7x7x256xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(transpose_13, full_int_array_44) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_45 = [-1, 49, 256] + + # pd_op.reshape: (256x49x256xf32) <- (256x7x7x256xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(reshape_21, full_int_array_45) + + # pd_op.matmul: (256x49x768xf32) <- (256x49x256xf32, 256x768xf32) + matmul_11 = paddle._C_ops.matmul(reshape_22, parameter_271, False, False) + del parameter_271 + + # pd_op.add: (256x49x768xf32) <- (256x49x768xf32, 768xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_270) + del parameter_270 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_46 = [256, 49, 3, 8, 32] + + # pd_op.reshape: (256x49x3x8x32xf32) <- (256x49x768xf32, 5xi64) + reshape_225 = paddle._C_ops.reshape(add_15, full_int_array_46) + + # pd_op.transpose: (3x256x8x49x32xf32) <- (256x49x3x8x32xf32) + transpose_14 = paddle._C_ops.transpose(reshape_225, [2, 0, 3, 1, 4]) + del reshape_225 + + # pd_op.slice: (256x8x49x32xf32) <- (3x256x8x49x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x8x49x32xf32) <- (3x256x8x49x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x8x49x32xf32) <- (3x256x8x49x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x8x49x32xf32) <- (256x8x49x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_28, full_0, float("0"), True) + del slice_28 + + # pd_op.transpose: (256x8x32x49xf32) <- (256x8x49x32xf32) + transpose_15 = paddle._C_ops.transpose(slice_29, [0, 1, 3, 2]) + del slice_29 + + # pd_op.matmul: (256x8x49x49xf32) <- (256x8x49x32xf32, 256x8x32x49xf32) + matmul_12 = paddle._C_ops.matmul(scale_2, transpose_15, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_23 = paddle._C_ops.reshape(data_17, full_int_array_15) + del data_17 + + # pd_op.index_select: (2401x8xf32) <- (169x8xf32, 2401xi64) + index_select_2 = paddle._C_ops.index_select(data_20, reshape_23, 0) + del data_20 + + # pd_op.reshape: (49x49x8xf32) <- (2401x8xf32, 3xi64) + reshape_226 = paddle._C_ops.reshape(index_select_2, full_int_array_16) + + # pd_op.transpose: (8x49x49xf32) <- (49x49x8xf32) + transpose_16 = paddle._C_ops.transpose(reshape_226, [2, 0, 1]) + del reshape_226 + + # pd_op.unsqueeze: (1x8x49x49xf32) <- (8x49x49xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_16, full_int_array_0) + + # pd_op.add: (256x8x49x49xf32) <- (256x8x49x49xf32, 1x8x49x49xf32) + add_173 = paddle._C_ops.add(matmul_12, unsqueeze_3) + + # pd_op.softmax: (256x8x49x49xf32) <- (256x8x49x49xf32) + softmax_2 = paddle._C_ops.softmax(add_173, -1) + del add_173 + + # pd_op.matmul: (256x8x49x32xf32) <- (256x8x49x49xf32, 256x8x49x32xf32) + matmul_126 = paddle._C_ops.matmul(softmax_2, slice_2, False, False) + + # pd_op.transpose: (256x49x8x32xf32) <- (256x8x49x32xf32) + transpose_17 = paddle._C_ops.transpose(matmul_126, [0, 2, 1, 3]) + del matmul_126 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_47 = [256, 49, 256] + + # pd_op.reshape: (256x49x256xf32) <- (256x49x8x32xf32, 3xi64) + reshape_24 = paddle._C_ops.reshape(transpose_17, full_int_array_47) + + # pd_op.matmul: (256x49x256xf32) <- (256x49x256xf32, 256x256xf32) + matmul_13 = paddle._C_ops.matmul(reshape_24, parameter_269, False, False) + del parameter_269 + + # pd_op.add: (256x49x256xf32) <- (256x49x256xf32, 256xf32) + add_16 = paddle._C_ops.add(matmul_13, parameter_268) + del parameter_268 + + # pd_op.reshape: (256x7x7x256xf32) <- (256x49x256xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_16, full_int_array_44) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_48 = [-1, 4, 4, 7, 7, 256] + + # pd_op.reshape: (16x4x4x7x7x256xf32) <- (256x7x7x256xf32, 6xi64) + reshape_227 = paddle._C_ops.reshape(reshape_25, full_int_array_48) + + # pd_op.transpose: (16x4x7x4x7x256xf32) <- (16x4x4x7x7x256xf32) + transpose_18 = paddle._C_ops.transpose(reshape_227, [0, 1, 3, 2, 4, 5]) + del reshape_227 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_49 = [-1, 28, 28, 256] + + # pd_op.reshape: (16x28x28x256xf32) <- (16x4x7x4x7x256xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(transpose_18, full_int_array_49) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_50 = [16, 784, 256] + + # pd_op.reshape: (16x784x256xf32) <- (16x28x28x256xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, full_int_array_50) + + # pd_op.full: (xf32) <- () + full_3 = paddle._C_ops.full( + [], + float("0.956522"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_29 = full_3 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_2 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_174 = paddle._C_ops.add(full_3, uniform_2) + del uniform_2 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_2 = paddle._C_ops.floor(add_174) + del add_174 + + # pd_op.divide: (16x784x256xf32) <- (16x784x256xf32, xf32) + divide_2 = paddle._C_ops.divide(reshape_27, full_3) + + # pd_op.multiply: (16x784x256xf32) <- (16x784x256xf32, 16x1x1xf32) + multiply_2 = paddle._C_ops.multiply(divide_2, floor_2) + + # pd_op.add: (16x784x256xf32) <- (16x784x256xf32, 16x784x256xf32) + add_17 = paddle._C_ops.add(matmul_10, multiply_2) + + # pd_op.layer_norm: (16x784x256xf32, 16x784xf32, 16x784xf32) <- (16x784x256xf32, 256xf32, 256xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (16x784x1024xf32) <- (16x784x256xf32, 256x1024xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del parameter_265 + + # pd_op.add: (16x784x1024xf32) <- (16x784x1024xf32, 1024xf32) + add_18 = paddle._C_ops.add(matmul_14, parameter_264) + del parameter_264 + + # pd_op.gelu: (16x784x1024xf32) <- (16x784x1024xf32) + gelu_2 = paddle._C_ops.gelu(add_18, False) + + # pd_op.matmul: (16x784x256xf32) <- (16x784x1024xf32, 1024x256xf32) + matmul_15 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del parameter_263 + + # pd_op.add: (16x784x256xf32) <- (16x784x256xf32, 256xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_262) + del parameter_262 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_3 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_175 = paddle._C_ops.add(full_3, uniform_3) + del uniform_3 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_3 = paddle._C_ops.floor(add_175) + del add_175 + + # pd_op.divide: (16x784x256xf32) <- (16x784x256xf32, xf32) + divide_3 = paddle._C_ops.divide(add_19, full_3) + + # pd_op.multiply: (16x784x256xf32) <- (16x784x256xf32, 16x1x1xf32) + multiply_3 = paddle._C_ops.multiply(divide_3, floor_3) + + # pd_op.add: (16x784x256xf32) <- (16x784x256xf32, 16x784x256xf32) + add_20 = paddle._C_ops.add(add_17, multiply_3) + + # pd_op.layer_norm: (16x784x256xf32, 16x784xf32, 16x784xf32) <- (16x784x256xf32, 256xf32, 256xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # pd_op.reshape: (16x28x28x256xf32) <- (16x784x256xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(layer_norm_24, full_int_array_42) + + # pd_op.roll: (16x28x28x256xf32) <- (16x28x28x256xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_28, full_int_array_4, [1, 2]) + + # pd_op.reshape: (16x4x7x4x7x256xf32) <- (16x28x28x256xf32, 6xi64) + reshape_228 = paddle._C_ops.reshape(roll_2, full_int_array_43) + del full_int_array_43 + + # pd_op.transpose: (16x4x4x7x7x256xf32) <- (16x4x7x4x7x256xf32) + transpose_19 = paddle._C_ops.transpose(reshape_228, [0, 1, 3, 2, 4, 5]) + del reshape_228 + + # pd_op.reshape: (256x7x7x256xf32) <- (16x4x4x7x7x256xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(transpose_19, full_int_array_44) + + # pd_op.reshape: (256x49x256xf32) <- (256x7x7x256xf32, 3xi64) + reshape_30 = paddle._C_ops.reshape(reshape_29, full_int_array_45) + del full_int_array_45 + + # pd_op.full: (1x28x28x1xf32) <- () + full_31 = paddle._C_ops.full( + [1, 28, 28, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_31, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_31 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_51 = [1, 4, 7, 4, 7, 1] + + # pd_op.reshape: (1x4x7x4x7x1xf32) <- (1x28x28x1xf32, 6xi64) + reshape_229 = paddle._C_ops.reshape(set_value__1, full_int_array_51) + del full_int_array_51 + + # pd_op.transpose: (1x4x4x7x7x1xf32) <- (1x4x7x4x7x1xf32) + transpose_147 = paddle._C_ops.transpose(reshape_229, [0, 1, 3, 2, 4, 5]) + del reshape_229 + + # pd_op.reshape: (16x7x7x1xf32) <- (1x4x4x7x7x1xf32, 4xi64) + reshape_230 = paddle._C_ops.reshape(transpose_147, full_int_array_36) + del transpose_147 + + # pd_op.reshape: (16x49xf32) <- (16x7x7x1xf32, 2xi64) + reshape_231 = paddle._C_ops.reshape(reshape_230, full_int_array_37) + del reshape_230 + + # pd_op.unsqueeze: (16x1x49xf32) <- (16x49xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_1) + + # pd_op.unsqueeze: (16x49x1xf32) <- (16x49xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_2) + del reshape_231 + + # pd_op.subtract: (16x49x49xf32) <- (16x1x49xf32, 16x49x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_40, unsqueeze_41) + del unsqueeze_40, unsqueeze_41 + + # pd_op.not_equal: (16x49x49xb) <- (16x49x49xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_26) + + # pd_op.full: (16x49x49xf32) <- () + full_32 = paddle._C_ops.full( + [16, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_32, subtract_1) + del full_32, not_equal_1, subtract_1 + + # pd_op.equal: (16x49x49xb) <- (16x49x49xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_26) + + # pd_op.full: (16x49x49xf32) <- () + full_33 = paddle._C_ops.full( + [16, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_3 = paddle._C_ops.where(equal_1, full_33, where_2) + del equal_1, full_33, where_2 + + # pd_op.matmul: (256x49x768xf32) <- (256x49x256xf32, 256x768xf32) + matmul_16 = paddle._C_ops.matmul(reshape_30, parameter_259, False, False) + del parameter_259 + + # pd_op.add: (256x49x768xf32) <- (256x49x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_16, parameter_258) + del parameter_258 + + # pd_op.reshape: (256x49x3x8x32xf32) <- (256x49x768xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_21, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (3x256x8x49x32xf32) <- (256x49x3x8x32xf32) + transpose_20 = paddle._C_ops.transpose(reshape_232, [2, 0, 3, 1, 4]) + del reshape_232 + + # pd_op.slice: (256x8x49x32xf32) <- (3x256x8x49x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x8x49x32xf32) <- (3x256x8x49x32xf32, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x8x49x32xf32) <- (3x256x8x49x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x8x49x32xf32) <- (256x8x49x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_30, full_0, float("0"), True) + del slice_30 + + # pd_op.transpose: (256x8x32x49xf32) <- (256x8x49x32xf32) + transpose_21 = paddle._C_ops.transpose(slice_31, [0, 1, 3, 2]) + del slice_31 + + # pd_op.matmul: (256x8x49x49xf32) <- (256x8x49x32xf32, 256x8x32x49xf32) + matmul_17 = paddle._C_ops.matmul(scale_3, transpose_21, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_31 = paddle._C_ops.reshape(data_25, full_int_array_15) + del data_25 + + # pd_op.index_select: (2401x8xf32) <- (169x8xf32, 2401xi64) + index_select_3 = paddle._C_ops.index_select(data_26, reshape_31, 0) + del data_26 + + # pd_op.reshape: (49x49x8xf32) <- (2401x8xf32, 3xi64) + reshape_233 = paddle._C_ops.reshape(index_select_3, full_int_array_16) + + # pd_op.transpose: (8x49x49xf32) <- (49x49x8xf32) + transpose_22 = paddle._C_ops.transpose(reshape_233, [2, 0, 1]) + del reshape_233 + + # pd_op.unsqueeze: (1x8x49x49xf32) <- (8x49x49xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_22, full_int_array_0) + + # pd_op.add: (256x8x49x49xf32) <- (256x8x49x49xf32, 1x8x49x49xf32) + add_22 = paddle._C_ops.add(matmul_17, unsqueeze_4) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_52 = [16, 16, 8, 49, 49] + + # pd_op.reshape: (16x16x8x49x49xf32) <- (256x8x49x49xf32, 5xi64) + reshape_32 = paddle._C_ops.reshape(add_22, full_int_array_52) + del full_int_array_52 + + # pd_op.unsqueeze: (16x1x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(where_3, full_int_array_1) + del where_3 + + # pd_op.unsqueeze: (1x16x1x49x49xf32) <- (16x1x49x49xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_42, full_int_array_0) + del unsqueeze_42 + + # pd_op.add: (16x16x8x49x49xf32) <- (16x16x8x49x49xf32, 1x16x1x49x49xf32) + add_23 = paddle._C_ops.add(reshape_32, unsqueeze_5) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_53 = [256, 8, 49, 49] + + # pd_op.reshape: (256x8x49x49xf32) <- (16x16x8x49x49xf32, 4xi64) + reshape_234 = paddle._C_ops.reshape(add_23, full_int_array_53) + del full_int_array_53 + + # pd_op.softmax: (256x8x49x49xf32) <- (256x8x49x49xf32) + softmax_3 = paddle._C_ops.softmax(reshape_234, -1) + del reshape_234 + + # pd_op.matmul: (256x8x49x32xf32) <- (256x8x49x49xf32, 256x8x49x32xf32) + matmul_127 = paddle._C_ops.matmul(softmax_3, slice_3, False, False) + + # pd_op.transpose: (256x49x8x32xf32) <- (256x8x49x32xf32) + transpose_23 = paddle._C_ops.transpose(matmul_127, [0, 2, 1, 3]) + del matmul_127 + + # pd_op.reshape: (256x49x256xf32) <- (256x49x8x32xf32, 3xi64) + reshape_33 = paddle._C_ops.reshape(transpose_23, full_int_array_47) + del full_int_array_47 + + # pd_op.matmul: (256x49x256xf32) <- (256x49x256xf32, 256x256xf32) + matmul_18 = paddle._C_ops.matmul(reshape_33, parameter_257, False, False) + del parameter_257 + + # pd_op.add: (256x49x256xf32) <- (256x49x256xf32, 256xf32) + add_24 = paddle._C_ops.add(matmul_18, parameter_256) + del parameter_256 + + # pd_op.reshape: (256x7x7x256xf32) <- (256x49x256xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_24, full_int_array_44) + del full_int_array_44 + + # pd_op.reshape: (16x4x4x7x7x256xf32) <- (256x7x7x256xf32, 6xi64) + reshape_235 = paddle._C_ops.reshape(reshape_34, full_int_array_48) + del full_int_array_48 + + # pd_op.transpose: (16x4x7x4x7x256xf32) <- (16x4x4x7x7x256xf32) + transpose_24 = paddle._C_ops.transpose(reshape_235, [0, 1, 3, 2, 4, 5]) + del reshape_235 + + # pd_op.reshape: (16x28x28x256xf32) <- (16x4x7x4x7x256xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_24, full_int_array_49) + del full_int_array_49 + + # pd_op.roll: (16x28x28x256xf32) <- (16x28x28x256xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_35, full_int_array_5, [1, 2]) + + # pd_op.reshape: (16x784x256xf32) <- (16x28x28x256xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(roll_3, full_int_array_50) + del full_int_array_50 + + # pd_op.full: (xf32) <- () + full_4 = paddle._C_ops.full( + [], + float("0.934783"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_40 = full_4 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_4 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_176 = paddle._C_ops.add(full_4, uniform_4) + del uniform_4 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_4 = paddle._C_ops.floor(add_176) + del add_176 + + # pd_op.divide: (16x784x256xf32) <- (16x784x256xf32, xf32) + divide_4 = paddle._C_ops.divide(reshape_36, full_4) + + # pd_op.multiply: (16x784x256xf32) <- (16x784x256xf32, 16x1x1xf32) + multiply_4 = paddle._C_ops.multiply(divide_4, floor_4) + + # pd_op.add: (16x784x256xf32) <- (16x784x256xf32, 16x784x256xf32) + add_25 = paddle._C_ops.add(add_20, multiply_4) + + # pd_op.layer_norm: (16x784x256xf32, 16x784xf32, 16x784xf32) <- (16x784x256xf32, 256xf32, 256xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (16x784x1024xf32) <- (16x784x256xf32, 256x1024xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del parameter_253 + + # pd_op.add: (16x784x1024xf32) <- (16x784x1024xf32, 1024xf32) + add_26 = paddle._C_ops.add(matmul_19, parameter_252) + del parameter_252 + + # pd_op.gelu: (16x784x1024xf32) <- (16x784x1024xf32) + gelu_3 = paddle._C_ops.gelu(add_26, False) + + # pd_op.matmul: (16x784x256xf32) <- (16x784x1024xf32, 1024x256xf32) + matmul_20 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del parameter_251 + + # pd_op.add: (16x784x256xf32) <- (16x784x256xf32, 256xf32) + add_27 = paddle._C_ops.add(matmul_20, parameter_250) + del parameter_250 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_5 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_177 = paddle._C_ops.add(full_4, uniform_5) + del uniform_5 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_5 = paddle._C_ops.floor(add_177) + del add_177 + + # pd_op.divide: (16x784x256xf32) <- (16x784x256xf32, xf32) + divide_5 = paddle._C_ops.divide(add_27, full_4) + + # pd_op.multiply: (16x784x256xf32) <- (16x784x256xf32, 16x1x1xf32) + multiply_5 = paddle._C_ops.multiply(divide_5, floor_5) + + # pd_op.add: (16x784x256xf32) <- (16x784x256xf32, 16x784x256xf32) + add_28 = paddle._C_ops.add(add_25, multiply_5) + + # pd_op.reshape: (16x28x28x256xf32) <- (16x784x256xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_28, full_int_array_42) + + # pd_op.strided_slice: (16x14x14x256xf32) <- (16x28x28x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (16x14x14x256xf32) <- (16x28x28x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (16x14x14x256xf32) <- (16x28x28x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (16x14x14x256xf32) <- (16x28x28x256xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (16x28x28x256xf32) <- (16x28x28x256xf32, 4xi64) + reshape_236 = paddle._C_ops.reshape(reshape_37, full_int_array_42) + del full_int_array_42 + + # builtin.combine: ([16x14x14x256xf32, 16x14x14x256xf32, 16x14x14x256xf32, 16x14x14x256xf32]) <- (16x14x14x256xf32, 16x14x14x256xf32, 16x14x14x256xf32, 16x14x14x256xf32) + combine_1 = [strided_slice_4, strided_slice_5, strided_slice_6, strided_slice_7] + + # pd_op.concat: (16x14x14x1024xf32) <- ([16x14x14x256xf32, 16x14x14x256xf32, 16x14x14x256xf32, 16x14x14x256xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_1, full_2) + del combine_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_54 = [16, -1, 1024] + + # pd_op.reshape: (16x196x1024xf32) <- (16x14x14x1024xf32, 3xi64) + reshape_38 = paddle._C_ops.reshape(concat_1, full_int_array_54) + del full_int_array_54 + + # pd_op.layer_norm: (16x196x1024xf32, 16x196xf32, 16x196xf32) <- (16x196x1024xf32, 1024xf32, 1024xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_38, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249 + + # pd_op.matmul: (16x196x512xf32) <- (16x196x1024xf32, 1024x512xf32) + matmul_21 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del parameter_247 + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_21, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_55 = [16, 14, 14, 512] + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(layer_norm_33, full_int_array_55) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_56 = [16, 2, 7, 2, 7, 512] + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_237 = paddle._C_ops.reshape(reshape_39, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_25 = paddle._C_ops.transpose(reshape_237, [0, 1, 3, 2, 4, 5]) + del reshape_237 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_57 = [-1, 7, 7, 512] + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(transpose_25, full_int_array_57) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_58 = [-1, 49, 512] + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(reshape_40, full_int_array_58) + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_22 = paddle._C_ops.matmul(reshape_41, parameter_244, False, False) + del parameter_244 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_29 = paddle._C_ops.add(matmul_22, parameter_243) + del parameter_243 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_59 = [64, 49, 3, 16, 32] + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_238 = paddle._C_ops.reshape(add_29, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_26 = paddle._C_ops.transpose(reshape_238, [2, 0, 3, 1, 4]) + del reshape_238 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_32, full_0, float("0"), True) + del slice_32 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_27 = paddle._C_ops.transpose(slice_33, [0, 1, 3, 2]) + del slice_33 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_23 = paddle._C_ops.matmul(scale_4, transpose_27, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_42 = paddle._C_ops.reshape(data_27, full_int_array_15) + del data_27 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_4 = paddle._C_ops.index_select(data_28, reshape_42, 0) + del data_28 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_239 = paddle._C_ops.reshape(index_select_4, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_28 = paddle._C_ops.transpose(reshape_239, [2, 0, 1]) + del reshape_239 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_28, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_178 = paddle._C_ops.add(matmul_23, unsqueeze_6) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_4 = paddle._C_ops.softmax(add_178, -1) + del add_178 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_128 = paddle._C_ops.matmul(softmax_4, slice_4, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_29 = paddle._C_ops.transpose(matmul_128, [0, 2, 1, 3]) + del matmul_128 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_60 = [64, 49, 512] + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_29, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_24 = paddle._C_ops.matmul(reshape_43, parameter_242, False, False) + del parameter_242 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_241) + del parameter_241 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_30, full_int_array_57) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_61 = [-1, 2, 2, 7, 7, 512] + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_44, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_30 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_62 = [-1, 14, 14, 512] + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(transpose_30, full_int_array_62) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_63 = [16, 196, 512] + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_46 = paddle._C_ops.reshape(reshape_45, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_5 = paddle._C_ops.full( + [], + float("0.913043"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_62 = full_5 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_6 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_179 = paddle._C_ops.add(full_5, uniform_6) + del uniform_6 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_6 = paddle._C_ops.floor(add_179) + del add_179 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_6 = paddle._C_ops.divide(reshape_46, full_5) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_6 = paddle._C_ops.multiply(divide_6, floor_6) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_31 = paddle._C_ops.add(matmul_21, multiply_6) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_31, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del parameter_238 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_32 = paddle._C_ops.add(matmul_25, parameter_237) + del parameter_237 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_4 = paddle._C_ops.gelu(add_32, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_26 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del parameter_236 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_33 = paddle._C_ops.add(matmul_26, parameter_235) + del parameter_235 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_7 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_180 = paddle._C_ops.add(full_5, uniform_7) + del uniform_7 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_7 = paddle._C_ops.floor(add_180) + del add_180 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_7 = paddle._C_ops.divide(add_33, full_5) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_7 = paddle._C_ops.multiply(divide_7, floor_7) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_34 = paddle._C_ops.add(add_31, multiply_7) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(layer_norm_39, full_int_array_55) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_47, full_int_array_4, [1, 2]) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_241 = paddle._C_ops.reshape(roll_4, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_31 = paddle._C_ops.transpose(reshape_241, [0, 1, 3, 2, 4, 5]) + del reshape_241 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(transpose_31, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_49 = paddle._C_ops.reshape(reshape_48, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_34 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_34, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_64 = [1, 2, 7, 2, 7, 1] + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_242 = paddle._C_ops.reshape(set_value__2, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_148 = paddle._C_ops.transpose(reshape_242, [0, 1, 3, 2, 4, 5]) + del reshape_242 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_243 = paddle._C_ops.reshape(transpose_148, full_int_array_36) + del transpose_148 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_244 = paddle._C_ops.reshape(reshape_243, full_int_array_37) + del reshape_243 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_2) + del reshape_244 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_26) + + # pd_op.full: (4x49x49xf32) <- () + full_35 = paddle._C_ops.full( + [4, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_35, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_26) + + # pd_op.full: (4x49x49xf32) <- () + full_36 = paddle._C_ops.full( + [4, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_5 = paddle._C_ops.where(equal_2, full_36, where_4) + del equal_2, where_4 + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_27 = paddle._C_ops.matmul(reshape_49, parameter_232, False, False) + del parameter_232 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_35 = paddle._C_ops.add(matmul_27, parameter_231) + del parameter_231 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_245 = paddle._C_ops.reshape(add_35, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_32 = paddle._C_ops.transpose(reshape_245, [2, 0, 3, 1, 4]) + del reshape_245 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_34, full_0, float("0"), True) + del slice_34 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_33 = paddle._C_ops.transpose(slice_35, [0, 1, 3, 2]) + del slice_35 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_28 = paddle._C_ops.matmul(scale_5, transpose_33, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_50 = paddle._C_ops.reshape(data_29, full_int_array_15) + del data_29 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_5 = paddle._C_ops.index_select(data_30, reshape_50, 0) + del data_30 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(index_select_5, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_34 = paddle._C_ops.transpose(reshape_246, [2, 0, 1]) + del reshape_246 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(transpose_34, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_36 = paddle._C_ops.add(matmul_28, unsqueeze_7) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_65 = [16, 4, 16, 49, 49] + + # pd_op.reshape: (16x4x16x49x49xf32) <- (64x16x49x49xf32, 5xi64) + reshape_51 = paddle._C_ops.reshape(add_36, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(where_5, full_int_array_1) + del where_5 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(unsqueeze_45, full_int_array_0) + del unsqueeze_45 + + # pd_op.add: (16x4x16x49x49xf32) <- (16x4x16x49x49xf32, 1x4x1x49x49xf32) + add_37 = paddle._C_ops.add(reshape_51, unsqueeze_8) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_66 = [64, 16, 49, 49] + + # pd_op.reshape: (64x16x49x49xf32) <- (16x4x16x49x49xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_37, full_int_array_66) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_5 = paddle._C_ops.softmax(reshape_247, -1) + del reshape_247 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_129 = paddle._C_ops.matmul(softmax_5, slice_5, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_35 = paddle._C_ops.transpose(matmul_129, [0, 2, 1, 3]) + del matmul_129 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_52 = paddle._C_ops.reshape(transpose_35, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_29 = paddle._C_ops.matmul(reshape_52, parameter_230, False, False) + del parameter_230 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_38 = paddle._C_ops.add(matmul_29, parameter_229) + del parameter_229 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(add_38, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_53, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_36 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(transpose_36, full_int_array_62) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_54, full_int_array_5, [1, 2]) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(roll_5, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_6 = paddle._C_ops.full( + [], + float("0.891304"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_73 = full_6 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_8 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_181 = paddle._C_ops.add(full_6, uniform_8) + del uniform_8 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_8 = paddle._C_ops.floor(add_181) + del add_181 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_8 = paddle._C_ops.divide(reshape_55, full_6) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_8 = paddle._C_ops.multiply(divide_8, floor_8) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_39 = paddle._C_ops.add(add_34, multiply_8) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_39, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del parameter_226 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_40 = paddle._C_ops.add(matmul_30, parameter_225) + del parameter_225 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_5 = paddle._C_ops.gelu(add_40, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_31 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del parameter_224 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_41 = paddle._C_ops.add(matmul_31, parameter_223) + del parameter_223 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_9 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_182 = paddle._C_ops.add(full_6, uniform_9) + del uniform_9 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_9 = paddle._C_ops.floor(add_182) + del add_182 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_9 = paddle._C_ops.divide(add_41, full_6) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_9 = paddle._C_ops.multiply(divide_9, floor_9) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_42 = paddle._C_ops.add(add_39, multiply_9) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(layer_norm_45, full_int_array_55) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_249 = paddle._C_ops.reshape(reshape_56, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_37 = paddle._C_ops.transpose(reshape_249, [0, 1, 3, 2, 4, 5]) + del reshape_249 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(transpose_37, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_58 = paddle._C_ops.reshape(reshape_57, full_int_array_58) + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_32 = paddle._C_ops.matmul(reshape_58, parameter_220, False, False) + del parameter_220 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_43 = paddle._C_ops.add(matmul_32, parameter_219) + del parameter_219 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_250 = paddle._C_ops.reshape(add_43, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_38 = paddle._C_ops.transpose(reshape_250, [2, 0, 3, 1, 4]) + del reshape_250 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_36, full_0, float("0"), True) + del slice_36 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_39 = paddle._C_ops.transpose(slice_37, [0, 1, 3, 2]) + del slice_37 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_33 = paddle._C_ops.matmul(scale_6, transpose_39, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_59 = paddle._C_ops.reshape(data_31, full_int_array_15) + del data_31 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_6 = paddle._C_ops.index_select(data_32, reshape_59, 0) + del data_32 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_251 = paddle._C_ops.reshape(index_select_6, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_40 = paddle._C_ops.transpose(reshape_251, [2, 0, 1]) + del reshape_251 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_40, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_183 = paddle._C_ops.add(matmul_33, unsqueeze_9) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_6 = paddle._C_ops.softmax(add_183, -1) + del add_183 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_6, slice_6, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_41 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_60 = paddle._C_ops.reshape(transpose_41, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_34 = paddle._C_ops.matmul(reshape_60, parameter_218, False, False) + del parameter_218 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_44 = paddle._C_ops.add(matmul_34, parameter_217) + del parameter_217 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(add_44, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(reshape_61, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_42 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(transpose_42, full_int_array_62) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_7 = paddle._C_ops.full( + [], + float("0.869565"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_82 = full_7 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_10 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_184 = paddle._C_ops.add(full_7, uniform_10) + del uniform_10 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_10 = paddle._C_ops.floor(add_184) + del add_184 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_10 = paddle._C_ops.divide(reshape_63, full_7) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_10 = paddle._C_ops.multiply(divide_10, floor_10) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_45 = paddle._C_ops.add(add_42, multiply_10) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del parameter_214 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_46 = paddle._C_ops.add(matmul_35, parameter_213) + del parameter_213 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_6 = paddle._C_ops.gelu(add_46, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_36 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del parameter_212 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_47 = paddle._C_ops.add(matmul_36, parameter_211) + del parameter_211 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_11 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_185 = paddle._C_ops.add(full_7, uniform_11) + del uniform_11 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_11 = paddle._C_ops.floor(add_185) + del add_185 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_11 = paddle._C_ops.divide(add_47, full_7) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_11 = paddle._C_ops.multiply(divide_11, floor_11) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_48 = paddle._C_ops.add(add_45, multiply_11) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(layer_norm_51, full_int_array_55) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_64, full_int_array_4, [1, 2]) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_253 = paddle._C_ops.reshape(roll_6, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_43 = paddle._C_ops.transpose(reshape_253, [0, 1, 3, 2, 4, 5]) + del reshape_253 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(transpose_43, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_37 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_37, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_254 = paddle._C_ops.reshape(set_value__3, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_149 = paddle._C_ops.transpose(reshape_254, [0, 1, 3, 2, 4, 5]) + del reshape_254 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_255 = paddle._C_ops.reshape(transpose_149, full_int_array_36) + del transpose_149 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_256 = paddle._C_ops.reshape(reshape_255, full_int_array_37) + del reshape_255 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_2) + del reshape_256 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_46, unsqueeze_47) + del unsqueeze_46, unsqueeze_47 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_35, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_7 = paddle._C_ops.where(equal_3, full_36, where_6) + del equal_3, where_6 + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_37 = paddle._C_ops.matmul(reshape_66, parameter_208, False, False) + del parameter_208 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_49 = paddle._C_ops.add(matmul_37, parameter_207) + del parameter_207 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_257 = paddle._C_ops.reshape(add_49, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_44 = paddle._C_ops.transpose(reshape_257, [2, 0, 3, 1, 4]) + del reshape_257 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_38, full_0, float("0"), True) + del slice_38 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_45 = paddle._C_ops.transpose(slice_39, [0, 1, 3, 2]) + del slice_39 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_38 = paddle._C_ops.matmul(scale_7, transpose_45, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_67 = paddle._C_ops.reshape(data_33, full_int_array_15) + del data_33 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_7 = paddle._C_ops.index_select(data_34, reshape_67, 0) + del data_34 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_258 = paddle._C_ops.reshape(index_select_7, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_46 = paddle._C_ops.transpose(reshape_258, [2, 0, 1]) + del reshape_258 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(transpose_46, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_50 = paddle._C_ops.add(matmul_38, unsqueeze_10) + + # pd_op.reshape: (16x4x16x49x49xf32) <- (64x16x49x49xf32, 5xi64) + reshape_68 = paddle._C_ops.reshape(add_50, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(where_7, full_int_array_1) + del where_7 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_48, full_int_array_0) + del unsqueeze_48 + + # pd_op.add: (16x4x16x49x49xf32) <- (16x4x16x49x49xf32, 1x4x1x49x49xf32) + add_51 = paddle._C_ops.add(reshape_68, unsqueeze_11) + + # pd_op.reshape: (64x16x49x49xf32) <- (16x4x16x49x49xf32, 4xi64) + reshape_259 = paddle._C_ops.reshape(add_51, full_int_array_66) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_7 = paddle._C_ops.softmax(reshape_259, -1) + del reshape_259 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_131 = paddle._C_ops.matmul(softmax_7, slice_7, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_47 = paddle._C_ops.transpose(matmul_131, [0, 2, 1, 3]) + del matmul_131 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(transpose_47, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_39 = paddle._C_ops.matmul(reshape_69, parameter_206, False, False) + del parameter_206 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_52 = paddle._C_ops.add(matmul_39, parameter_205) + del parameter_205 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(add_52, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_260 = paddle._C_ops.reshape(reshape_70, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_48 = paddle._C_ops.transpose(reshape_260, [0, 1, 3, 2, 4, 5]) + del reshape_260 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(transpose_48, full_int_array_62) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_71, full_int_array_5, [1, 2]) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(roll_7, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_8 = paddle._C_ops.full( + [], + float("0.847826"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_93 = full_8 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_12 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_186 = paddle._C_ops.add(full_8, uniform_12) + del uniform_12 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_12 = paddle._C_ops.floor(add_186) + del add_186 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_12 = paddle._C_ops.divide(reshape_72, full_8) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_12 = paddle._C_ops.multiply(divide_12, floor_12) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_53 = paddle._C_ops.add(add_48, multiply_12) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del parameter_202 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_54 = paddle._C_ops.add(matmul_40, parameter_201) + del parameter_201 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_7 = paddle._C_ops.gelu(add_54, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_41 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del parameter_200 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_55 = paddle._C_ops.add(matmul_41, parameter_199) + del parameter_199 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_13 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_187 = paddle._C_ops.add(full_8, uniform_13) + del uniform_13 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_13 = paddle._C_ops.floor(add_187) + del add_187 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_13 = paddle._C_ops.divide(add_55, full_8) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_13 = paddle._C_ops.multiply(divide_13, floor_13) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_56 = paddle._C_ops.add(add_53, multiply_13) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(layer_norm_57, full_int_array_55) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_261 = paddle._C_ops.reshape(reshape_73, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_49 = paddle._C_ops.transpose(reshape_261, [0, 1, 3, 2, 4, 5]) + del reshape_261 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(transpose_49, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_75 = paddle._C_ops.reshape(reshape_74, full_int_array_58) + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_42 = paddle._C_ops.matmul(reshape_75, parameter_196, False, False) + del parameter_196 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_57 = paddle._C_ops.add(matmul_42, parameter_195) + del parameter_195 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_262 = paddle._C_ops.reshape(add_57, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_50 = paddle._C_ops.transpose(reshape_262, [2, 0, 3, 1, 4]) + del reshape_262 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_40, full_0, float("0"), True) + del slice_40 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_51 = paddle._C_ops.transpose(slice_41, [0, 1, 3, 2]) + del slice_41 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_43 = paddle._C_ops.matmul(scale_8, transpose_51, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_76 = paddle._C_ops.reshape(data_35, full_int_array_15) + del data_35 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_8 = paddle._C_ops.index_select(data_36, reshape_76, 0) + del data_36 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(index_select_8, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_52 = paddle._C_ops.transpose(reshape_263, [2, 0, 1]) + del reshape_263 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_52, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_188 = paddle._C_ops.add(matmul_43, unsqueeze_12) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_8 = paddle._C_ops.softmax(add_188, -1) + del add_188 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_132 = paddle._C_ops.matmul(softmax_8, slice_8, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_53 = paddle._C_ops.transpose(matmul_132, [0, 2, 1, 3]) + del matmul_132 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_77 = paddle._C_ops.reshape(transpose_53, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_44 = paddle._C_ops.matmul(reshape_77, parameter_194, False, False) + del parameter_194 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_58 = paddle._C_ops.add(matmul_44, parameter_193) + del parameter_193 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(add_58, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_264 = paddle._C_ops.reshape(reshape_78, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_54 = paddle._C_ops.transpose(reshape_264, [0, 1, 3, 2, 4, 5]) + del reshape_264 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_54, full_int_array_62) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_9 = paddle._C_ops.full( + [], + float("0.826087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_102 = full_9 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_14 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_189 = paddle._C_ops.add(full_9, uniform_14) + del uniform_14 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_14 = paddle._C_ops.floor(add_189) + del add_189 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_14 = paddle._C_ops.divide(reshape_80, full_9) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_14 = paddle._C_ops.multiply(divide_14, floor_14) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_59 = paddle._C_ops.add(add_56, multiply_14) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_59, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_45 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_60 = paddle._C_ops.add(matmul_45, parameter_189) + del parameter_189 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_8 = paddle._C_ops.gelu(add_60, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_46 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del parameter_188 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_61 = paddle._C_ops.add(matmul_46, parameter_187) + del parameter_187 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_15 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_190 = paddle._C_ops.add(full_9, uniform_15) + del uniform_15 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_15 = paddle._C_ops.floor(add_190) + del add_190 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_15 = paddle._C_ops.divide(add_61, full_9) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_15 = paddle._C_ops.multiply(divide_15, floor_15) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_62 = paddle._C_ops.add(add_59, multiply_15) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(layer_norm_63, full_int_array_55) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_81, full_int_array_4, [1, 2]) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(roll_8, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_55 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_55, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_38 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_38, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_266 = paddle._C_ops.reshape(set_value__4, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_150 = paddle._C_ops.transpose(reshape_266, [0, 1, 3, 2, 4, 5]) + del reshape_266 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_267 = paddle._C_ops.reshape(transpose_150, full_int_array_36) + del transpose_150 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_268 = paddle._C_ops.reshape(reshape_267, full_int_array_37) + del reshape_267 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_2) + del reshape_268 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_35, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_9 = paddle._C_ops.where(equal_4, full_36, where_8) + del equal_4, where_8 + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_47 = paddle._C_ops.matmul(reshape_83, parameter_184, False, False) + del parameter_184 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_63 = paddle._C_ops.add(matmul_47, parameter_183) + del parameter_183 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_269 = paddle._C_ops.reshape(add_63, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_56 = paddle._C_ops.transpose(reshape_269, [2, 0, 3, 1, 4]) + del reshape_269 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_42, full_0, float("0"), True) + del slice_42 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_57 = paddle._C_ops.transpose(slice_43, [0, 1, 3, 2]) + del slice_43 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_48 = paddle._C_ops.matmul(scale_9, transpose_57, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_84 = paddle._C_ops.reshape(data_37, full_int_array_15) + del data_37 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_9 = paddle._C_ops.index_select(data_38, reshape_84, 0) + del data_38 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_270 = paddle._C_ops.reshape(index_select_9, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_58 = paddle._C_ops.transpose(reshape_270, [2, 0, 1]) + del reshape_270 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(transpose_58, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_64 = paddle._C_ops.add(matmul_48, unsqueeze_13) + + # pd_op.reshape: (16x4x16x49x49xf32) <- (64x16x49x49xf32, 5xi64) + reshape_85 = paddle._C_ops.reshape(add_64, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(where_9, full_int_array_1) + del where_9 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(unsqueeze_51, full_int_array_0) + del unsqueeze_51 + + # pd_op.add: (16x4x16x49x49xf32) <- (16x4x16x49x49xf32, 1x4x1x49x49xf32) + add_65 = paddle._C_ops.add(reshape_85, unsqueeze_14) + + # pd_op.reshape: (64x16x49x49xf32) <- (16x4x16x49x49xf32, 4xi64) + reshape_271 = paddle._C_ops.reshape(add_65, full_int_array_66) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_9 = paddle._C_ops.softmax(reshape_271, -1) + del reshape_271 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_133 = paddle._C_ops.matmul(softmax_9, slice_9, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_59 = paddle._C_ops.transpose(matmul_133, [0, 2, 1, 3]) + del matmul_133 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(transpose_59, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_49 = paddle._C_ops.matmul(reshape_86, parameter_182, False, False) + del parameter_182 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_66 = paddle._C_ops.add(matmul_49, parameter_181) + del parameter_181 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(add_66, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_272 = paddle._C_ops.reshape(reshape_87, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_60 = paddle._C_ops.transpose(reshape_272, [0, 1, 3, 2, 4, 5]) + del reshape_272 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(transpose_60, full_int_array_62) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_88, full_int_array_5, [1, 2]) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(roll_9, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_10 = paddle._C_ops.full( + [], + float("0.804348"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_113 = full_10 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_16 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_191 = paddle._C_ops.add(full_10, uniform_16) + del uniform_16 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_16 = paddle._C_ops.floor(add_191) + del add_191 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_16 = paddle._C_ops.divide(reshape_89, full_10) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_16 = paddle._C_ops.multiply(divide_16, floor_16) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_67 = paddle._C_ops.add(add_62, multiply_16) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_67, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del parameter_178 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_68 = paddle._C_ops.add(matmul_50, parameter_177) + del parameter_177 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_9 = paddle._C_ops.gelu(add_68, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_51 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del parameter_176 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_69 = paddle._C_ops.add(matmul_51, parameter_175) + del parameter_175 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_17 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_192 = paddle._C_ops.add(full_10, uniform_17) + del uniform_17 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_17 = paddle._C_ops.floor(add_192) + del add_192 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_17 = paddle._C_ops.divide(add_69, full_10) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_17 = paddle._C_ops.multiply(divide_17, floor_17) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_70 = paddle._C_ops.add(add_67, multiply_17) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(layer_norm_69, full_int_array_55) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_273 = paddle._C_ops.reshape(reshape_90, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_61 = paddle._C_ops.transpose(reshape_273, [0, 1, 3, 2, 4, 5]) + del reshape_273 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_91 = paddle._C_ops.reshape(transpose_61, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_92 = paddle._C_ops.reshape(reshape_91, full_int_array_58) + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_52 = paddle._C_ops.matmul(reshape_92, parameter_172, False, False) + del parameter_172 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_71 = paddle._C_ops.add(matmul_52, parameter_171) + del parameter_171 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_274 = paddle._C_ops.reshape(add_71, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_62 = paddle._C_ops.transpose(reshape_274, [2, 0, 3, 1, 4]) + del reshape_274 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_44, full_0, float("0"), True) + del slice_44 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_63 = paddle._C_ops.transpose(slice_45, [0, 1, 3, 2]) + del slice_45 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_53 = paddle._C_ops.matmul(scale_10, transpose_63, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_93 = paddle._C_ops.reshape(data_39, full_int_array_15) + del data_39 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_10 = paddle._C_ops.index_select(data_40, reshape_93, 0) + del data_40 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(index_select_10, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_64 = paddle._C_ops.transpose(reshape_275, [2, 0, 1]) + del reshape_275 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_64, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_193 = paddle._C_ops.add(matmul_53, unsqueeze_15) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_10 = paddle._C_ops.softmax(add_193, -1) + del add_193 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_134 = paddle._C_ops.matmul(softmax_10, slice_10, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_65 = paddle._C_ops.transpose(matmul_134, [0, 2, 1, 3]) + del matmul_134 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_94 = paddle._C_ops.reshape(transpose_65, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_54 = paddle._C_ops.matmul(reshape_94, parameter_170, False, False) + del parameter_170 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_72 = paddle._C_ops.add(matmul_54, parameter_169) + del parameter_169 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_95 = paddle._C_ops.reshape(add_72, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_276 = paddle._C_ops.reshape(reshape_95, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_66 = paddle._C_ops.transpose(reshape_276, [0, 1, 3, 2, 4, 5]) + del reshape_276 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_66, full_int_array_62) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], + float("0.782609"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_122 = full_11 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_18 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_194 = paddle._C_ops.add(full_11, uniform_18) + del uniform_18 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_18 = paddle._C_ops.floor(add_194) + del add_194 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_18 = paddle._C_ops.divide(reshape_97, full_11) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_18 = paddle._C_ops.multiply(divide_18, floor_18) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_73 = paddle._C_ops.add(add_70, multiply_18) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del parameter_166 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_74 = paddle._C_ops.add(matmul_55, parameter_165) + del parameter_165 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_10 = paddle._C_ops.gelu(add_74, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_56 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del parameter_164 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_75 = paddle._C_ops.add(matmul_56, parameter_163) + del parameter_163 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_19 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_195 = paddle._C_ops.add(full_11, uniform_19) + del uniform_19 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_19 = paddle._C_ops.floor(add_195) + del add_195 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_19 = paddle._C_ops.divide(add_75, full_11) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_19 = paddle._C_ops.multiply(divide_19, floor_19) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_76 = paddle._C_ops.add(add_73, multiply_19) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(layer_norm_75, full_int_array_55) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_98, full_int_array_4, [1, 2]) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(roll_10, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_67 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_99 = paddle._C_ops.reshape(transpose_67, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(reshape_99, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_39 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_39, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_278 = paddle._C_ops.reshape(set_value__5, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_278, [0, 1, 3, 2, 4, 5]) + del reshape_278 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_279 = paddle._C_ops.reshape(transpose_151, full_int_array_36) + del transpose_151 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_280 = paddle._C_ops.reshape(reshape_279, full_int_array_37) + del reshape_279 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_2) + del reshape_280 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_52, unsqueeze_53) + del unsqueeze_52, unsqueeze_53 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_35, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_11 = paddle._C_ops.where(equal_5, full_36, where_10) + del equal_5, where_10 + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_57 = paddle._C_ops.matmul(reshape_100, parameter_160, False, False) + del parameter_160 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_77 = paddle._C_ops.add(matmul_57, parameter_159) + del parameter_159 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_281 = paddle._C_ops.reshape(add_77, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_68 = paddle._C_ops.transpose(reshape_281, [2, 0, 3, 1, 4]) + del reshape_281 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_46, full_0, float("0"), True) + del slice_46 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_69 = paddle._C_ops.transpose(slice_47, [0, 1, 3, 2]) + del slice_47 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_58 = paddle._C_ops.matmul(scale_11, transpose_69, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_101 = paddle._C_ops.reshape(data_41, full_int_array_15) + del data_41 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_11 = paddle._C_ops.index_select(data_42, reshape_101, 0) + del data_42 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_282 = paddle._C_ops.reshape(index_select_11, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_70 = paddle._C_ops.transpose(reshape_282, [2, 0, 1]) + del reshape_282 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(transpose_70, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_78 = paddle._C_ops.add(matmul_58, unsqueeze_16) + + # pd_op.reshape: (16x4x16x49x49xf32) <- (64x16x49x49xf32, 5xi64) + reshape_102 = paddle._C_ops.reshape(add_78, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(where_11, full_int_array_1) + del where_11 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_54, full_int_array_0) + del unsqueeze_54 + + # pd_op.add: (16x4x16x49x49xf32) <- (16x4x16x49x49xf32, 1x4x1x49x49xf32) + add_79 = paddle._C_ops.add(reshape_102, unsqueeze_17) + + # pd_op.reshape: (64x16x49x49xf32) <- (16x4x16x49x49xf32, 4xi64) + reshape_283 = paddle._C_ops.reshape(add_79, full_int_array_66) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_11 = paddle._C_ops.softmax(reshape_283, -1) + del reshape_283 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_135 = paddle._C_ops.matmul(softmax_11, slice_11, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_71 = paddle._C_ops.transpose(matmul_135, [0, 2, 1, 3]) + del matmul_135 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_103 = paddle._C_ops.reshape(transpose_71, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_59 = paddle._C_ops.matmul(reshape_103, parameter_158, False, False) + del parameter_158 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_80 = paddle._C_ops.add(matmul_59, parameter_157) + del parameter_157 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(add_80, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(reshape_104, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_72 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_105 = paddle._C_ops.reshape(transpose_72, full_int_array_62) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_105, full_int_array_5, [1, 2]) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_106 = paddle._C_ops.reshape(roll_11, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_12 = paddle._C_ops.full( + [], + float("0.76087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_133 = full_12 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_20 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_196 = paddle._C_ops.add(full_12, uniform_20) + del uniform_20 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_20 = paddle._C_ops.floor(add_196) + del add_196 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_20 = paddle._C_ops.divide(reshape_106, full_12) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_20 = paddle._C_ops.multiply(divide_20, floor_20) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_81 = paddle._C_ops.add(add_76, multiply_20) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del parameter_154 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_82 = paddle._C_ops.add(matmul_60, parameter_153) + del parameter_153 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_11 = paddle._C_ops.gelu(add_82, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_61 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del parameter_152 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_83 = paddle._C_ops.add(matmul_61, parameter_151) + del parameter_151 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_21 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_197 = paddle._C_ops.add(full_12, uniform_21) + del uniform_21 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_21 = paddle._C_ops.floor(add_197) + del add_197 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_21 = paddle._C_ops.divide(add_83, full_12) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_21 = paddle._C_ops.multiply(divide_21, floor_21) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_84 = paddle._C_ops.add(add_81, multiply_21) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_84, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(layer_norm_81, full_int_array_55) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_285 = paddle._C_ops.reshape(reshape_107, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_73 = paddle._C_ops.transpose(reshape_285, [0, 1, 3, 2, 4, 5]) + del reshape_285 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_73, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_58) + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_62 = paddle._C_ops.matmul(reshape_109, parameter_148, False, False) + del parameter_148 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_85 = paddle._C_ops.add(matmul_62, parameter_147) + del parameter_147 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_286 = paddle._C_ops.reshape(add_85, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_286, [2, 0, 3, 1, 4]) + del reshape_286 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_48, full_0, float("0"), True) + del slice_48 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_49, [0, 1, 3, 2]) + del slice_49 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_63 = paddle._C_ops.matmul(scale_12, transpose_75, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_110 = paddle._C_ops.reshape(data_44, full_int_array_15) + del data_44 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_12 = paddle._C_ops.index_select(data_45, reshape_110, 0) + del data_45 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_287 = paddle._C_ops.reshape(index_select_12, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_76 = paddle._C_ops.transpose(reshape_287, [2, 0, 1]) + del reshape_287 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_198 = paddle._C_ops.add(matmul_63, unsqueeze_18) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_12 = paddle._C_ops.softmax(add_198, -1) + del add_198 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_136 = paddle._C_ops.matmul(softmax_12, slice_12, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_136, [0, 2, 1, 3]) + del matmul_136 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(transpose_77, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_64 = paddle._C_ops.matmul(reshape_111, parameter_146, False, False) + del parameter_146 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_86 = paddle._C_ops.add(matmul_64, parameter_145) + del parameter_145 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_112 = paddle._C_ops.reshape(add_86, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_288 = paddle._C_ops.reshape(reshape_112, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_78 = paddle._C_ops.transpose(reshape_288, [0, 1, 3, 2, 4, 5]) + del reshape_288 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_113 = paddle._C_ops.reshape(transpose_78, full_int_array_62) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_114 = paddle._C_ops.reshape(reshape_113, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_13 = paddle._C_ops.full( + [], + float("0.73913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_142 = full_13 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_22 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_199 = paddle._C_ops.add(full_13, uniform_22) + del uniform_22 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_22 = paddle._C_ops.floor(add_199) + del add_199 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_22 = paddle._C_ops.divide(reshape_114, full_13) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_22 = paddle._C_ops.multiply(divide_22, floor_22) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_87 = paddle._C_ops.add(add_84, multiply_22) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del parameter_142 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_88 = paddle._C_ops.add(matmul_65, parameter_141) + del parameter_141 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_12 = paddle._C_ops.gelu(add_88, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_66 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del parameter_140 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_89 = paddle._C_ops.add(matmul_66, parameter_139) + del parameter_139 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_23 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_200 = paddle._C_ops.add(full_13, uniform_23) + del uniform_23 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_23 = paddle._C_ops.floor(add_200) + del add_200 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_23 = paddle._C_ops.divide(add_89, full_13) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_23 = paddle._C_ops.multiply(divide_23, floor_23) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_90 = paddle._C_ops.add(add_87, multiply_23) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(layer_norm_87, full_int_array_55) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_115, full_int_array_4, [1, 2]) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_289 = paddle._C_ops.reshape(roll_12, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_79 = paddle._C_ops.transpose(reshape_289, [0, 1, 3, 2, 4, 5]) + del reshape_289 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_116 = paddle._C_ops.reshape(transpose_79, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_117 = paddle._C_ops.reshape(reshape_116, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_40 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_40, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_290 = paddle._C_ops.reshape(set_value__6, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_152 = paddle._C_ops.transpose(reshape_290, [0, 1, 3, 2, 4, 5]) + del reshape_290 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(transpose_152, full_int_array_36) + del transpose_152 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_292 = paddle._C_ops.reshape(reshape_291, full_int_array_37) + del reshape_291 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_2) + del reshape_292 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_35, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_13 = paddle._C_ops.where(equal_6, full_36, where_12) + del equal_6, where_12 + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_67 = paddle._C_ops.matmul(reshape_117, parameter_136, False, False) + del parameter_136 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_91 = paddle._C_ops.add(matmul_67, parameter_135) + del parameter_135 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_293 = paddle._C_ops.reshape(add_91, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_293, [2, 0, 3, 1, 4]) + del reshape_293 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_50, full_0, float("0"), True) + del slice_50 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_51, [0, 1, 3, 2]) + del slice_51 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_68 = paddle._C_ops.matmul(scale_13, transpose_81, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_118 = paddle._C_ops.reshape(data_47, full_int_array_15) + del data_47 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_13 = paddle._C_ops.index_select(data_48, reshape_118, 0) + del data_48 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_294 = paddle._C_ops.reshape(index_select_13, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_82 = paddle._C_ops.transpose(reshape_294, [2, 0, 1]) + del reshape_294 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_92 = paddle._C_ops.add(matmul_68, unsqueeze_19) + + # pd_op.reshape: (16x4x16x49x49xf32) <- (64x16x49x49xf32, 5xi64) + reshape_119 = paddle._C_ops.reshape(add_92, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(where_13, full_int_array_1) + del where_13 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(unsqueeze_57, full_int_array_0) + del unsqueeze_57 + + # pd_op.add: (16x4x16x49x49xf32) <- (16x4x16x49x49xf32, 1x4x1x49x49xf32) + add_93 = paddle._C_ops.add(reshape_119, unsqueeze_20) + + # pd_op.reshape: (64x16x49x49xf32) <- (16x4x16x49x49xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(add_93, full_int_array_66) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_13 = paddle._C_ops.softmax(reshape_295, -1) + del reshape_295 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_13, slice_13, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_120 = paddle._C_ops.reshape(transpose_83, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_69 = paddle._C_ops.matmul(reshape_120, parameter_134, False, False) + del parameter_134 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_94 = paddle._C_ops.add(matmul_69, parameter_133) + del parameter_133 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(add_94, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_296 = paddle._C_ops.reshape(reshape_121, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_84 = paddle._C_ops.transpose(reshape_296, [0, 1, 3, 2, 4, 5]) + del reshape_296 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_122 = paddle._C_ops.reshape(transpose_84, full_int_array_62) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_122, full_int_array_5, [1, 2]) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_123 = paddle._C_ops.reshape(roll_13, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_14 = paddle._C_ops.full( + [], + float("0.717391"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_153 = full_14 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_24 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_201 = paddle._C_ops.add(full_14, uniform_24) + del uniform_24 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_24 = paddle._C_ops.floor(add_201) + del add_201 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_24 = paddle._C_ops.divide(reshape_123, full_14) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_24 = paddle._C_ops.multiply(divide_24, floor_24) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_95 = paddle._C_ops.add(add_90, multiply_24) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del parameter_130 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_96 = paddle._C_ops.add(matmul_70, parameter_129) + del parameter_129 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_13 = paddle._C_ops.gelu(add_96, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_71 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del parameter_128 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_97 = paddle._C_ops.add(matmul_71, parameter_127) + del parameter_127 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_25 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_202 = paddle._C_ops.add(full_14, uniform_25) + del uniform_25 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_25 = paddle._C_ops.floor(add_202) + del add_202 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_25 = paddle._C_ops.divide(add_97, full_14) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_25 = paddle._C_ops.multiply(divide_25, floor_25) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_98 = paddle._C_ops.add(add_95, multiply_25) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_124 = paddle._C_ops.reshape(layer_norm_93, full_int_array_55) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_297 = paddle._C_ops.reshape(reshape_124, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_85 = paddle._C_ops.transpose(reshape_297, [0, 1, 3, 2, 4, 5]) + del reshape_297 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_85, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_58) + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_72 = paddle._C_ops.matmul(reshape_126, parameter_124, False, False) + del parameter_124 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_99 = paddle._C_ops.add(matmul_72, parameter_123) + del parameter_123 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_298 = paddle._C_ops.reshape(add_99, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_86 = paddle._C_ops.transpose(reshape_298, [2, 0, 3, 1, 4]) + del reshape_298 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_52, full_0, float("0"), True) + del slice_52 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_87 = paddle._C_ops.transpose(slice_53, [0, 1, 3, 2]) + del slice_53 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_73 = paddle._C_ops.matmul(scale_14, transpose_87, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_127 = paddle._C_ops.reshape(data_1, full_int_array_15) + del data_1 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_14 = paddle._C_ops.index_select(data_2, reshape_127, 0) + del data_2 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_299 = paddle._C_ops.reshape(index_select_14, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_88 = paddle._C_ops.transpose(reshape_299, [2, 0, 1]) + del reshape_299 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_88, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_203 = paddle._C_ops.add(matmul_73, unsqueeze_21) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_14 = paddle._C_ops.softmax(add_203, -1) + del add_203 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_138 = paddle._C_ops.matmul(softmax_14, slice_14, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_89 = paddle._C_ops.transpose(matmul_138, [0, 2, 1, 3]) + del matmul_138 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_128 = paddle._C_ops.reshape(transpose_89, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_74 = paddle._C_ops.matmul(reshape_128, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_100 = paddle._C_ops.add(matmul_74, parameter_121) + del parameter_121 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_129 = paddle._C_ops.reshape(add_100, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_300 = paddle._C_ops.reshape(reshape_129, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_90 = paddle._C_ops.transpose(reshape_300, [0, 1, 3, 2, 4, 5]) + del reshape_300 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_130 = paddle._C_ops.reshape(transpose_90, full_int_array_62) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_131 = paddle._C_ops.reshape(reshape_130, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_15 = paddle._C_ops.full( + [], + float("0.695652"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_162 = full_15 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_26 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_204 = paddle._C_ops.add(full_15, uniform_26) + del uniform_26 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_26 = paddle._C_ops.floor(add_204) + del add_204 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_26 = paddle._C_ops.divide(reshape_131, full_15) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_26 = paddle._C_ops.multiply(divide_26, floor_26) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_101 = paddle._C_ops.add(add_98, multiply_26) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del parameter_118 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_102 = paddle._C_ops.add(matmul_75, parameter_117) + del parameter_117 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_14 = paddle._C_ops.gelu(add_102, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_76 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del parameter_116 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_103 = paddle._C_ops.add(matmul_76, parameter_115) + del parameter_115 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_27 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_205 = paddle._C_ops.add(full_15, uniform_27) + del uniform_27 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_27 = paddle._C_ops.floor(add_205) + del add_205 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_27 = paddle._C_ops.divide(add_103, full_15) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_27 = paddle._C_ops.multiply(divide_27, floor_27) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_104 = paddle._C_ops.add(add_101, multiply_27) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_104, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_132 = paddle._C_ops.reshape(layer_norm_99, full_int_array_55) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_132, full_int_array_4, [1, 2]) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_301 = paddle._C_ops.reshape(roll_14, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_91 = paddle._C_ops.transpose(reshape_301, [0, 1, 3, 2, 4, 5]) + del reshape_301 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_91, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_41 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_41, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_302 = paddle._C_ops.reshape(set_value__7, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_153 = paddle._C_ops.transpose(reshape_302, [0, 1, 3, 2, 4, 5]) + del reshape_302 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_303 = paddle._C_ops.reshape(transpose_153, full_int_array_36) + del transpose_153 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_304 = paddle._C_ops.reshape(reshape_303, full_int_array_37) + del reshape_303 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_2) + del reshape_304 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_58, unsqueeze_59) + del unsqueeze_58, unsqueeze_59 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_35, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_15 = paddle._C_ops.where(equal_7, full_36, where_14) + del equal_7, where_14 + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_77 = paddle._C_ops.matmul(reshape_134, parameter_112, False, False) + del parameter_112 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_105 = paddle._C_ops.add(matmul_77, parameter_111) + del parameter_111 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_305 = paddle._C_ops.reshape(add_105, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_92 = paddle._C_ops.transpose(reshape_305, [2, 0, 3, 1, 4]) + del reshape_305 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_54, full_0, float("0"), True) + del slice_54 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_93 = paddle._C_ops.transpose(slice_55, [0, 1, 3, 2]) + del slice_55 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_78 = paddle._C_ops.matmul(scale_15, transpose_93, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_135 = paddle._C_ops.reshape(data_3, full_int_array_15) + del data_3 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_15 = paddle._C_ops.index_select(data_4, reshape_135, 0) + del data_4 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_306 = paddle._C_ops.reshape(index_select_15, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_94 = paddle._C_ops.transpose(reshape_306, [2, 0, 1]) + del reshape_306 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(transpose_94, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_106 = paddle._C_ops.add(matmul_78, unsqueeze_22) + + # pd_op.reshape: (16x4x16x49x49xf32) <- (64x16x49x49xf32, 5xi64) + reshape_136 = paddle._C_ops.reshape(add_106, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(where_15, full_int_array_1) + del where_15 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_60, full_int_array_0) + del unsqueeze_60 + + # pd_op.add: (16x4x16x49x49xf32) <- (16x4x16x49x49xf32, 1x4x1x49x49xf32) + add_107 = paddle._C_ops.add(reshape_136, unsqueeze_23) + + # pd_op.reshape: (64x16x49x49xf32) <- (16x4x16x49x49xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(add_107, full_int_array_66) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_15 = paddle._C_ops.softmax(reshape_307, -1) + del reshape_307 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_139 = paddle._C_ops.matmul(softmax_15, slice_15, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_95 = paddle._C_ops.transpose(matmul_139, [0, 2, 1, 3]) + del matmul_139 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_137 = paddle._C_ops.reshape(transpose_95, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_79 = paddle._C_ops.matmul(reshape_137, parameter_110, False, False) + del parameter_110 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_108 = paddle._C_ops.add(matmul_79, parameter_109) + del parameter_109 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_138 = paddle._C_ops.reshape(add_108, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_308 = paddle._C_ops.reshape(reshape_138, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_96 = paddle._C_ops.transpose(reshape_308, [0, 1, 3, 2, 4, 5]) + del reshape_308 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_139 = paddle._C_ops.reshape(transpose_96, full_int_array_62) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_139, full_int_array_5, [1, 2]) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_140 = paddle._C_ops.reshape(roll_15, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_16 = paddle._C_ops.full( + [], + float("0.673913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_173 = full_16 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_28 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_206 = paddle._C_ops.add(full_16, uniform_28) + del uniform_28 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_28 = paddle._C_ops.floor(add_206) + del add_206 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_28 = paddle._C_ops.divide(reshape_140, full_16) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_28 = paddle._C_ops.multiply(divide_28, floor_28) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_109 = paddle._C_ops.add(add_104, multiply_28) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_109, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del parameter_106 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_110 = paddle._C_ops.add(matmul_80, parameter_105) + del parameter_105 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_15 = paddle._C_ops.gelu(add_110, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_81 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del parameter_104 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_111 = paddle._C_ops.add(matmul_81, parameter_103) + del parameter_103 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_29 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_207 = paddle._C_ops.add(full_16, uniform_29) + del uniform_29 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_29 = paddle._C_ops.floor(add_207) + del add_207 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_29 = paddle._C_ops.divide(add_111, full_16) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_29 = paddle._C_ops.multiply(divide_29, floor_29) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_112 = paddle._C_ops.add(add_109, multiply_29) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_112, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(layer_norm_105, full_int_array_55) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_309 = paddle._C_ops.reshape(reshape_141, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_97 = paddle._C_ops.transpose(reshape_309, [0, 1, 3, 2, 4, 5]) + del reshape_309 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_142 = paddle._C_ops.reshape(transpose_97, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_143 = paddle._C_ops.reshape(reshape_142, full_int_array_58) + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_82 = paddle._C_ops.matmul(reshape_143, parameter_100, False, False) + del parameter_100 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_113 = paddle._C_ops.add(matmul_82, parameter_99) + del parameter_99 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_310 = paddle._C_ops.reshape(add_113, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_98 = paddle._C_ops.transpose(reshape_310, [2, 0, 3, 1, 4]) + del reshape_310 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_56, full_0, float("0"), True) + del slice_56 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_99 = paddle._C_ops.transpose(slice_57, [0, 1, 3, 2]) + del slice_57 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_83 = paddle._C_ops.matmul(scale_16, transpose_99, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_144 = paddle._C_ops.reshape(data_5, full_int_array_15) + del data_5 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_16 = paddle._C_ops.index_select(data_6, reshape_144, 0) + del data_6 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_311 = paddle._C_ops.reshape(index_select_16, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_100 = paddle._C_ops.transpose(reshape_311, [2, 0, 1]) + del reshape_311 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_100, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_208 = paddle._C_ops.add(matmul_83, unsqueeze_24) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_16 = paddle._C_ops.softmax(add_208, -1) + del add_208 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_140 = paddle._C_ops.matmul(softmax_16, slice_16, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_101 = paddle._C_ops.transpose(matmul_140, [0, 2, 1, 3]) + del matmul_140 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_145 = paddle._C_ops.reshape(transpose_101, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_84 = paddle._C_ops.matmul(reshape_145, parameter_98, False, False) + del parameter_98 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_114 = paddle._C_ops.add(matmul_84, parameter_97) + del parameter_97 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_114, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_312 = paddle._C_ops.reshape(reshape_146, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_102 = paddle._C_ops.transpose(reshape_312, [0, 1, 3, 2, 4, 5]) + del reshape_312 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_147 = paddle._C_ops.reshape(transpose_102, full_int_array_62) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_148 = paddle._C_ops.reshape(reshape_147, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_17 = paddle._C_ops.full( + [], + float("0.652174"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_182 = full_17 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_30 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_209 = paddle._C_ops.add(full_17, uniform_30) + del uniform_30 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_30 = paddle._C_ops.floor(add_209) + del add_209 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_30 = paddle._C_ops.divide(reshape_148, full_17) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_30 = paddle._C_ops.multiply(divide_30, floor_30) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_115 = paddle._C_ops.add(add_112, multiply_30) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_115, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_85 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del parameter_94 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_116 = paddle._C_ops.add(matmul_85, parameter_93) + del parameter_93 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_16 = paddle._C_ops.gelu(add_116, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_86 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del parameter_92 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_117 = paddle._C_ops.add(matmul_86, parameter_91) + del parameter_91 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_31 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_210 = paddle._C_ops.add(full_17, uniform_31) + del uniform_31 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_31 = paddle._C_ops.floor(add_210) + del add_210 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_31 = paddle._C_ops.divide(add_117, full_17) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_31 = paddle._C_ops.multiply(divide_31, floor_31) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_118 = paddle._C_ops.add(add_115, multiply_31) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_149 = paddle._C_ops.reshape(layer_norm_111, full_int_array_55) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_149, full_int_array_4, [1, 2]) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(roll_16, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_103 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_103, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(reshape_150, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_42 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_42, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_314 = paddle._C_ops.reshape(set_value__8, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_154 = paddle._C_ops.transpose(reshape_314, [0, 1, 3, 2, 4, 5]) + del reshape_314 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_315 = paddle._C_ops.reshape(transpose_154, full_int_array_36) + del transpose_154 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_316 = paddle._C_ops.reshape(reshape_315, full_int_array_37) + del reshape_315 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_2) + del reshape_316 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_35, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_17 = paddle._C_ops.where(equal_8, full_36, where_16) + del equal_8, where_16 + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_87 = paddle._C_ops.matmul(reshape_151, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_119 = paddle._C_ops.add(matmul_87, parameter_87) + del parameter_87 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_317 = paddle._C_ops.reshape(add_119, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_104 = paddle._C_ops.transpose(reshape_317, [2, 0, 3, 1, 4]) + del reshape_317 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_58, full_0, float("0"), True) + del slice_58 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_105 = paddle._C_ops.transpose(slice_59, [0, 1, 3, 2]) + del slice_59 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_88 = paddle._C_ops.matmul(scale_17, transpose_105, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_152 = paddle._C_ops.reshape(data_7, full_int_array_15) + del data_7 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_17 = paddle._C_ops.index_select(data_8, reshape_152, 0) + del data_8 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_17, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_106 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(transpose_106, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_120 = paddle._C_ops.add(matmul_88, unsqueeze_25) + + # pd_op.reshape: (16x4x16x49x49xf32) <- (64x16x49x49xf32, 5xi64) + reshape_153 = paddle._C_ops.reshape(add_120, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(where_17, full_int_array_1) + del where_17 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(unsqueeze_63, full_int_array_0) + del unsqueeze_63 + + # pd_op.add: (16x4x16x49x49xf32) <- (16x4x16x49x49xf32, 1x4x1x49x49xf32) + add_121 = paddle._C_ops.add(reshape_153, unsqueeze_26) + + # pd_op.reshape: (64x16x49x49xf32) <- (16x4x16x49x49xf32, 4xi64) + reshape_319 = paddle._C_ops.reshape(add_121, full_int_array_66) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_17 = paddle._C_ops.softmax(reshape_319, -1) + del reshape_319 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_141 = paddle._C_ops.matmul(softmax_17, slice_17, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_107 = paddle._C_ops.transpose(matmul_141, [0, 2, 1, 3]) + del matmul_141 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_154 = paddle._C_ops.reshape(transpose_107, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_89 = paddle._C_ops.matmul(reshape_154, parameter_86, False, False) + del parameter_86 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_122 = paddle._C_ops.add(matmul_89, parameter_85) + del parameter_85 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_155 = paddle._C_ops.reshape(add_122, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_320 = paddle._C_ops.reshape(reshape_155, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_108 = paddle._C_ops.transpose(reshape_320, [0, 1, 3, 2, 4, 5]) + del reshape_320 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_156 = paddle._C_ops.reshape(transpose_108, full_int_array_62) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_156, full_int_array_5, [1, 2]) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_157 = paddle._C_ops.reshape(roll_17, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_18 = paddle._C_ops.full( + [], + float("0.630435"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_193 = full_18 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_32 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_211 = paddle._C_ops.add(full_18, uniform_32) + del uniform_32 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_32 = paddle._C_ops.floor(add_211) + del add_211 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_32 = paddle._C_ops.divide(reshape_157, full_18) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_32 = paddle._C_ops.multiply(divide_32, floor_32) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_123 = paddle._C_ops.add(add_118, multiply_32) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_123, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del parameter_82 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_124 = paddle._C_ops.add(matmul_90, parameter_81) + del parameter_81 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_17 = paddle._C_ops.gelu(add_124, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_91 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del parameter_80 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_125 = paddle._C_ops.add(matmul_91, parameter_79) + del parameter_79 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_33 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_212 = paddle._C_ops.add(full_18, uniform_33) + del uniform_33 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_33 = paddle._C_ops.floor(add_212) + del add_212 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_33 = paddle._C_ops.divide(add_125, full_18) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_33 = paddle._C_ops.multiply(divide_33, floor_33) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_126 = paddle._C_ops.add(add_123, multiply_33) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_126, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_158 = paddle._C_ops.reshape(layer_norm_117, full_int_array_55) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_321 = paddle._C_ops.reshape(reshape_158, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_109 = paddle._C_ops.transpose(reshape_321, [0, 1, 3, 2, 4, 5]) + del reshape_321 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_159 = paddle._C_ops.reshape(transpose_109, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_160 = paddle._C_ops.reshape(reshape_159, full_int_array_58) + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_92 = paddle._C_ops.matmul(reshape_160, parameter_76, False, False) + del parameter_76 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_127 = paddle._C_ops.add(matmul_92, parameter_75) + del parameter_75 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_322 = paddle._C_ops.reshape(add_127, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_110 = paddle._C_ops.transpose(reshape_322, [2, 0, 3, 1, 4]) + del reshape_322 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_60, full_0, float("0"), True) + del slice_60 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_111 = paddle._C_ops.transpose(slice_61, [0, 1, 3, 2]) + del slice_61 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_93 = paddle._C_ops.matmul(scale_18, transpose_111, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_161 = paddle._C_ops.reshape(data_9, full_int_array_15) + del data_9 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_18 = paddle._C_ops.index_select(data_10, reshape_161, 0) + del data_10 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_323 = paddle._C_ops.reshape(index_select_18, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_112 = paddle._C_ops.transpose(reshape_323, [2, 0, 1]) + del reshape_323 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_112, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_213 = paddle._C_ops.add(matmul_93, unsqueeze_27) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_18 = paddle._C_ops.softmax(add_213, -1) + del add_213 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_142 = paddle._C_ops.matmul(softmax_18, slice_18, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_113 = paddle._C_ops.transpose(matmul_142, [0, 2, 1, 3]) + del matmul_142 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_162 = paddle._C_ops.reshape(transpose_113, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_94 = paddle._C_ops.matmul(reshape_162, parameter_74, False, False) + del parameter_74 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_128 = paddle._C_ops.add(matmul_94, parameter_73) + del parameter_73 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_163 = paddle._C_ops.reshape(add_128, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_324 = paddle._C_ops.reshape(reshape_163, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_114 = paddle._C_ops.transpose(reshape_324, [0, 1, 3, 2, 4, 5]) + del reshape_324 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(transpose_114, full_int_array_62) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_165 = paddle._C_ops.reshape(reshape_164, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_19 = paddle._C_ops.full( + [], + float("0.608696"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_202 = full_19 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_34 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_214 = paddle._C_ops.add(full_19, uniform_34) + del uniform_34 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_34 = paddle._C_ops.floor(add_214) + del add_214 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_34 = paddle._C_ops.divide(reshape_165, full_19) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_34 = paddle._C_ops.multiply(divide_34, floor_34) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_129 = paddle._C_ops.add(add_126, multiply_34) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_129, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del parameter_70 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_130 = paddle._C_ops.add(matmul_95, parameter_69) + del parameter_69 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_18 = paddle._C_ops.gelu(add_130, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_96 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del parameter_68 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_131 = paddle._C_ops.add(matmul_96, parameter_67) + del parameter_67 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_35 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_215 = paddle._C_ops.add(full_19, uniform_35) + del uniform_35 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_35 = paddle._C_ops.floor(add_215) + del add_215 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_35 = paddle._C_ops.divide(add_131, full_19) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_35 = paddle._C_ops.multiply(divide_35, floor_35) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_132 = paddle._C_ops.add(add_129, multiply_35) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_132, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(layer_norm_123, full_int_array_55) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_166, full_int_array_4, [1, 2]) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_325 = paddle._C_ops.reshape(roll_18, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_115 = paddle._C_ops.transpose(reshape_325, [0, 1, 3, 2, 4, 5]) + del reshape_325 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_167 = paddle._C_ops.reshape(transpose_115, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_168 = paddle._C_ops.reshape(reshape_167, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_43 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_43, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_43 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_326 = paddle._C_ops.reshape(set_value__9, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_155 = paddle._C_ops.transpose(reshape_326, [0, 1, 3, 2, 4, 5]) + del reshape_326 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(transpose_155, full_int_array_36) + del transpose_155 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_328 = paddle._C_ops.reshape(reshape_327, full_int_array_37) + del reshape_327 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_2) + del reshape_328 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_64, unsqueeze_65) + del unsqueeze_64, unsqueeze_65 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_35, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_19 = paddle._C_ops.where(equal_9, full_36, where_18) + del equal_9, where_18 + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_97 = paddle._C_ops.matmul(reshape_168, parameter_64, False, False) + del parameter_64 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_133 = paddle._C_ops.add(matmul_97, parameter_63) + del parameter_63 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_329 = paddle._C_ops.reshape(add_133, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_116 = paddle._C_ops.transpose(reshape_329, [2, 0, 3, 1, 4]) + del reshape_329 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_62, full_0, float("0"), True) + del slice_62 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_117 = paddle._C_ops.transpose(slice_63, [0, 1, 3, 2]) + del slice_63 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_98 = paddle._C_ops.matmul(scale_19, transpose_117, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_169 = paddle._C_ops.reshape(data_11, full_int_array_15) + del data_11 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_19 = paddle._C_ops.index_select(data_12, reshape_169, 0) + del data_12 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_330 = paddle._C_ops.reshape(index_select_19, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_118 = paddle._C_ops.transpose(reshape_330, [2, 0, 1]) + del reshape_330 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(transpose_118, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_134 = paddle._C_ops.add(matmul_98, unsqueeze_28) + + # pd_op.reshape: (16x4x16x49x49xf32) <- (64x16x49x49xf32, 5xi64) + reshape_170 = paddle._C_ops.reshape(add_134, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(where_19, full_int_array_1) + del where_19 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_66, full_int_array_0) + del unsqueeze_66 + + # pd_op.add: (16x4x16x49x49xf32) <- (16x4x16x49x49xf32, 1x4x1x49x49xf32) + add_135 = paddle._C_ops.add(reshape_170, unsqueeze_29) + + # pd_op.reshape: (64x16x49x49xf32) <- (16x4x16x49x49xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(add_135, full_int_array_66) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_19 = paddle._C_ops.softmax(reshape_331, -1) + del reshape_331 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_19, slice_19, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_119 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_171 = paddle._C_ops.reshape(transpose_119, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_99 = paddle._C_ops.matmul(reshape_171, parameter_62, False, False) + del parameter_62 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_136 = paddle._C_ops.add(matmul_99, parameter_61) + del parameter_61 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_172 = paddle._C_ops.reshape(add_136, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_332 = paddle._C_ops.reshape(reshape_172, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_120 = paddle._C_ops.transpose(reshape_332, [0, 1, 3, 2, 4, 5]) + del reshape_332 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_173 = paddle._C_ops.reshape(transpose_120, full_int_array_62) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_173, full_int_array_5, [1, 2]) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_174 = paddle._C_ops.reshape(roll_19, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_20 = paddle._C_ops.full( + [], + float("0.586957"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_213 = full_20 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_36 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_216 = paddle._C_ops.add(full_20, uniform_36) + del uniform_36 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_36 = paddle._C_ops.floor(add_216) + del add_216 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_36 = paddle._C_ops.divide(reshape_174, full_20) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_36 = paddle._C_ops.multiply(divide_36, floor_36) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_137 = paddle._C_ops.add(add_132, multiply_36) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_137, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_100 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del parameter_58 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_138 = paddle._C_ops.add(matmul_100, parameter_57) + del parameter_57 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_19 = paddle._C_ops.gelu(add_138, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_101 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del parameter_56 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_139 = paddle._C_ops.add(matmul_101, parameter_55) + del parameter_55 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_37 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_217 = paddle._C_ops.add(full_20, uniform_37) + del uniform_37 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_37 = paddle._C_ops.floor(add_217) + del add_217 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_37 = paddle._C_ops.divide(add_139, full_20) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_37 = paddle._C_ops.multiply(divide_37, floor_37) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_140 = paddle._C_ops.add(add_137, multiply_37) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(layer_norm_129, full_int_array_55) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_333 = paddle._C_ops.reshape(reshape_175, full_int_array_56) + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_121 = paddle._C_ops.transpose(reshape_333, [0, 1, 3, 2, 4, 5]) + del reshape_333 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_176 = paddle._C_ops.reshape(transpose_121, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_177 = paddle._C_ops.reshape(reshape_176, full_int_array_58) + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_102 = paddle._C_ops.matmul(reshape_177, parameter_52, False, False) + del parameter_52 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_141 = paddle._C_ops.add(matmul_102, parameter_51) + del parameter_51 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_334 = paddle._C_ops.reshape(add_141, full_int_array_59) + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_122 = paddle._C_ops.transpose(reshape_334, [2, 0, 3, 1, 4]) + del reshape_334 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_64, full_0, float("0"), True) + del slice_64 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_123 = paddle._C_ops.transpose(slice_65, [0, 1, 3, 2]) + del slice_65 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_103 = paddle._C_ops.matmul(scale_20, transpose_123, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_178 = paddle._C_ops.reshape(data_13, full_int_array_15) + del data_13 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_20 = paddle._C_ops.index_select(data_14, reshape_178, 0) + del data_14 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_20, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_124 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_124, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_218 = paddle._C_ops.add(matmul_103, unsqueeze_30) + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_20 = paddle._C_ops.softmax(add_218, -1) + del add_218 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_144 = paddle._C_ops.matmul(softmax_20, slice_20, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_125 = paddle._C_ops.transpose(matmul_144, [0, 2, 1, 3]) + del matmul_144 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_179 = paddle._C_ops.reshape(transpose_125, full_int_array_60) + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_104 = paddle._C_ops.matmul(reshape_179, parameter_50, False, False) + del parameter_50 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_142 = paddle._C_ops.add(matmul_104, parameter_49) + del parameter_49 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_180 = paddle._C_ops.reshape(add_142, full_int_array_57) + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_336 = paddle._C_ops.reshape(reshape_180, full_int_array_61) + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_126 = paddle._C_ops.transpose(reshape_336, [0, 1, 3, 2, 4, 5]) + del reshape_336 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(transpose_126, full_int_array_62) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_21 = paddle._C_ops.full( + [], + float("0.565217"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_222 = full_21 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_38 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_219 = paddle._C_ops.add(full_21, uniform_38) + del uniform_38 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_38 = paddle._C_ops.floor(add_219) + del add_219 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_38 = paddle._C_ops.divide(reshape_182, full_21) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_38 = paddle._C_ops.multiply(divide_38, floor_38) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_143 = paddle._C_ops.add(add_140, multiply_38) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_105 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del parameter_46 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_144 = paddle._C_ops.add(matmul_105, parameter_45) + del parameter_45 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_20 = paddle._C_ops.gelu(add_144, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_106 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del parameter_44 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_145 = paddle._C_ops.add(matmul_106, parameter_43) + del parameter_43 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_39 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_220 = paddle._C_ops.add(full_21, uniform_39) + del uniform_39 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_39 = paddle._C_ops.floor(add_220) + del add_220 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_39 = paddle._C_ops.divide(add_145, full_21) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_39 = paddle._C_ops.multiply(divide_39, floor_39) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_146 = paddle._C_ops.add(add_143, multiply_39) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_146, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(layer_norm_135, full_int_array_55) + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_183, full_int_array_4, [1, 2]) + + # pd_op.reshape: (16x2x7x2x7x512xf32) <- (16x14x14x512xf32, 6xi64) + reshape_337 = paddle._C_ops.reshape(roll_20, full_int_array_56) + del full_int_array_56 + + # pd_op.transpose: (16x2x2x7x7x512xf32) <- (16x2x7x2x7x512xf32) + transpose_127 = paddle._C_ops.transpose(reshape_337, [0, 1, 3, 2, 4, 5]) + del reshape_337 + + # pd_op.reshape: (64x7x7x512xf32) <- (16x2x2x7x7x512xf32, 4xi64) + reshape_184 = paddle._C_ops.reshape(transpose_127, full_int_array_57) + + # pd_op.reshape: (64x49x512xf32) <- (64x7x7x512xf32, 3xi64) + reshape_185 = paddle._C_ops.reshape(reshape_184, full_int_array_58) + del full_int_array_58 + + # pd_op.full: (1x14x14x1xf32) <- () + full_44 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_44, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(set_value__10, full_int_array_64) + del full_int_array_64 + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_156 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_156, full_int_array_36) + del transpose_156 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, full_int_array_37) + del reshape_339 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_2) + del reshape_340 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_35, subtract_10) + del full_35, not_equal_10, subtract_10 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_21 = paddle._C_ops.where(equal_10, full_36, where_20) + del equal_10, full_36, where_20 + + # pd_op.matmul: (64x49x1536xf32) <- (64x49x512xf32, 512x1536xf32) + matmul_107 = paddle._C_ops.matmul(reshape_185, parameter_40, False, False) + del parameter_40 + + # pd_op.add: (64x49x1536xf32) <- (64x49x1536xf32, 1536xf32) + add_147 = paddle._C_ops.add(matmul_107, parameter_39) + del parameter_39 + + # pd_op.reshape: (64x49x3x16x32xf32) <- (64x49x1536xf32, 5xi64) + reshape_341 = paddle._C_ops.reshape(add_147, full_int_array_59) + del full_int_array_59 + + # pd_op.transpose: (3x64x16x49x32xf32) <- (64x49x3x16x32xf32) + transpose_128 = paddle._C_ops.transpose(reshape_341, [2, 0, 3, 1, 4]) + del reshape_341 + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x16x49x32xf32) <- (3x64x16x49x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x16x49x32xf32) <- (64x16x49x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_66, full_0, float("0"), True) + del slice_66 + + # pd_op.transpose: (64x16x32x49xf32) <- (64x16x49x32xf32) + transpose_129 = paddle._C_ops.transpose(slice_67, [0, 1, 3, 2]) + del slice_67 + + # pd_op.matmul: (64x16x49x49xf32) <- (64x16x49x32xf32, 64x16x32x49xf32) + matmul_108 = paddle._C_ops.matmul(scale_21, transpose_129, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_15, full_int_array_15) + del data_15 + + # pd_op.index_select: (2401x16xf32) <- (169x16xf32, 2401xi64) + index_select_21 = paddle._C_ops.index_select(data_16, reshape_186, 0) + del data_16 + + # pd_op.reshape: (49x49x16xf32) <- (2401x16xf32, 3xi64) + reshape_342 = paddle._C_ops.reshape(index_select_21, full_int_array_16) + + # pd_op.transpose: (16x49x49xf32) <- (49x49x16xf32) + transpose_130 = paddle._C_ops.transpose(reshape_342, [2, 0, 1]) + del reshape_342 + + # pd_op.unsqueeze: (1x16x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(transpose_130, full_int_array_0) + + # pd_op.add: (64x16x49x49xf32) <- (64x16x49x49xf32, 1x16x49x49xf32) + add_148 = paddle._C_ops.add(matmul_108, unsqueeze_31) + + # pd_op.reshape: (16x4x16x49x49xf32) <- (64x16x49x49xf32, 5xi64) + reshape_187 = paddle._C_ops.reshape(add_148, full_int_array_65) + del full_int_array_65 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(where_21, full_int_array_1) + del where_21 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(unsqueeze_69, full_int_array_0) + del unsqueeze_69 + + # pd_op.add: (16x4x16x49x49xf32) <- (16x4x16x49x49xf32, 1x4x1x49x49xf32) + add_149 = paddle._C_ops.add(reshape_187, unsqueeze_32) + + # pd_op.reshape: (64x16x49x49xf32) <- (16x4x16x49x49xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(add_149, full_int_array_66) + del full_int_array_66 + + # pd_op.softmax: (64x16x49x49xf32) <- (64x16x49x49xf32) + softmax_21 = paddle._C_ops.softmax(reshape_343, -1) + del reshape_343 + + # pd_op.matmul: (64x16x49x32xf32) <- (64x16x49x49xf32, 64x16x49x32xf32) + matmul_145 = paddle._C_ops.matmul(softmax_21, slice_21, False, False) + + # pd_op.transpose: (64x49x16x32xf32) <- (64x16x49x32xf32) + transpose_131 = paddle._C_ops.transpose(matmul_145, [0, 2, 1, 3]) + del matmul_145 + + # pd_op.reshape: (64x49x512xf32) <- (64x49x16x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_131, full_int_array_60) + del full_int_array_60 + + # pd_op.matmul: (64x49x512xf32) <- (64x49x512xf32, 512x512xf32) + matmul_109 = paddle._C_ops.matmul(reshape_188, parameter_38, False, False) + del parameter_38 + + # pd_op.add: (64x49x512xf32) <- (64x49x512xf32, 512xf32) + add_150 = paddle._C_ops.add(matmul_109, parameter_37) + del parameter_37 + + # pd_op.reshape: (64x7x7x512xf32) <- (64x49x512xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_150, full_int_array_57) + del full_int_array_57 + + # pd_op.reshape: (16x2x2x7x7x512xf32) <- (64x7x7x512xf32, 6xi64) + reshape_344 = paddle._C_ops.reshape(reshape_189, full_int_array_61) + del full_int_array_61 + + # pd_op.transpose: (16x2x7x2x7x512xf32) <- (16x2x2x7x7x512xf32) + transpose_132 = paddle._C_ops.transpose(reshape_344, [0, 1, 3, 2, 4, 5]) + del reshape_344 + + # pd_op.reshape: (16x14x14x512xf32) <- (16x2x7x2x7x512xf32, 4xi64) + reshape_190 = paddle._C_ops.reshape(transpose_132, full_int_array_62) + del full_int_array_62 + + # pd_op.roll: (16x14x14x512xf32) <- (16x14x14x512xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_190, full_int_array_5, [1, 2]) + + # pd_op.reshape: (16x196x512xf32) <- (16x14x14x512xf32, 3xi64) + reshape_191 = paddle._C_ops.reshape(roll_21, full_int_array_63) + del full_int_array_63 + + # pd_op.full: (xf32) <- () + full_22 = paddle._C_ops.full( + [], + float("0.543478"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_233 = full_22 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_40 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_221 = paddle._C_ops.add(full_22, uniform_40) + del uniform_40 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_40 = paddle._C_ops.floor(add_221) + del add_221 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_40 = paddle._C_ops.divide(reshape_191, full_22) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_40 = paddle._C_ops.multiply(divide_40, floor_40) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_151 = paddle._C_ops.add(add_146, multiply_40) + + # pd_op.layer_norm: (16x196x512xf32, 16x196xf32, 16x196xf32) <- (16x196x512xf32, 512xf32, 512xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (16x196x2048xf32) <- (16x196x512xf32, 512x2048xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del parameter_34 + + # pd_op.add: (16x196x2048xf32) <- (16x196x2048xf32, 2048xf32) + add_152 = paddle._C_ops.add(matmul_110, parameter_33) + del parameter_33 + + # pd_op.gelu: (16x196x2048xf32) <- (16x196x2048xf32) + gelu_21 = paddle._C_ops.gelu(add_152, False) + + # pd_op.matmul: (16x196x512xf32) <- (16x196x2048xf32, 2048x512xf32) + matmul_111 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del parameter_32 + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 512xf32) + add_153 = paddle._C_ops.add(matmul_111, parameter_31) + del parameter_31 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_41 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_222 = paddle._C_ops.add(full_22, uniform_41) + del uniform_41 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_41 = paddle._C_ops.floor(add_222) + del add_222 + + # pd_op.divide: (16x196x512xf32) <- (16x196x512xf32, xf32) + divide_41 = paddle._C_ops.divide(add_153, full_22) + + # pd_op.multiply: (16x196x512xf32) <- (16x196x512xf32, 16x1x1xf32) + multiply_41 = paddle._C_ops.multiply(divide_41, floor_41) + + # pd_op.add: (16x196x512xf32) <- (16x196x512xf32, 16x196x512xf32) + add_154 = paddle._C_ops.add(add_151, multiply_41) + + # pd_op.reshape: (16x14x14x512xf32) <- (16x196x512xf32, 4xi64) + reshape_192 = paddle._C_ops.reshape(add_154, full_int_array_55) + + # pd_op.strided_slice: (16x7x7x512xf32) <- (16x14x14x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (16x7x7x512xf32) <- (16x14x14x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (16x7x7x512xf32) <- (16x14x14x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (16x7x7x512xf32) <- (16x14x14x512xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (16x14x14x512xf32) <- (16x14x14x512xf32, 4xi64) + reshape_345 = paddle._C_ops.reshape(reshape_192, full_int_array_55) + del full_int_array_55 + + # builtin.combine: ([16x7x7x512xf32, 16x7x7x512xf32, 16x7x7x512xf32, 16x7x7x512xf32]) <- (16x7x7x512xf32, 16x7x7x512xf32, 16x7x7x512xf32, 16x7x7x512xf32) + combine_2 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + + # pd_op.concat: (16x7x7x2048xf32) <- ([16x7x7x512xf32, 16x7x7x512xf32, 16x7x7x512xf32, 16x7x7x512xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_2, full_2) + del combine_2 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_67 = [16, -1, 2048] + + # pd_op.reshape: (16x49x2048xf32) <- (16x7x7x2048xf32, 3xi64) + reshape_193 = paddle._C_ops.reshape(concat_2, full_int_array_67) + del full_int_array_67 + + # pd_op.layer_norm: (16x49x2048xf32, 16x49xf32, 16x49xf32) <- (16x49x2048xf32, 2048xf32, 2048xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_193, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30 + + # pd_op.matmul: (16x49x1024xf32) <- (16x49x2048xf32, 2048x1024xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del parameter_28 + + # pd_op.layer_norm: (16x49x1024xf32, 16x49xf32, 16x49xf32) <- (16x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_112, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_68 = [16, 7, 7, 1024] + + # pd_op.reshape: (16x7x7x1024xf32) <- (16x49x1024xf32, 4xi64) + reshape_194 = paddle._C_ops.reshape(layer_norm_144, full_int_array_68) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_69 = [16, 1, 7, 1, 7, 1024] + + # pd_op.reshape: (16x1x7x1x7x1024xf32) <- (16x7x7x1024xf32, 6xi64) + reshape_346 = paddle._C_ops.reshape(reshape_194, full_int_array_69) + + # pd_op.transpose: (16x1x1x7x7x1024xf32) <- (16x1x7x1x7x1024xf32) + transpose_133 = paddle._C_ops.transpose(reshape_346, [0, 1, 3, 2, 4, 5]) + del reshape_346 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_70 = [-1, 7, 7, 1024] + + # pd_op.reshape: (16x7x7x1024xf32) <- (16x1x1x7x7x1024xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_133, full_int_array_70) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_71 = [-1, 49, 1024] + + # pd_op.reshape: (16x49x1024xf32) <- (16x7x7x1024xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_71) + + # pd_op.matmul: (16x49x3072xf32) <- (16x49x1024xf32, 1024x3072xf32) + matmul_113 = paddle._C_ops.matmul(reshape_196, parameter_25, False, False) + del parameter_25 + + # pd_op.add: (16x49x3072xf32) <- (16x49x3072xf32, 3072xf32) + add_155 = paddle._C_ops.add(matmul_113, parameter_24) + del parameter_24 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_72 = [16, 49, 3, 32, 32] + + # pd_op.reshape: (16x49x3x32x32xf32) <- (16x49x3072xf32, 5xi64) + reshape_347 = paddle._C_ops.reshape(add_155, full_int_array_72) + + # pd_op.transpose: (3x16x32x49x32xf32) <- (16x49x3x32x32xf32) + transpose_134 = paddle._C_ops.transpose(reshape_347, [2, 0, 3, 1, 4]) + del reshape_347 + + # pd_op.slice: (16x32x49x32xf32) <- (3x16x32x49x32xf32, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x32x49x32xf32) <- (3x16x32x49x32xf32, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x32x49x32xf32) <- (3x16x32x49x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x32x49x32xf32) <- (16x32x49x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_68, full_0, float("0"), True) + del slice_68 + + # pd_op.transpose: (16x32x32x49xf32) <- (16x32x49x32xf32) + transpose_135 = paddle._C_ops.transpose(slice_69, [0, 1, 3, 2]) + del slice_69 + + # pd_op.matmul: (16x32x49x49xf32) <- (16x32x49x32xf32, 16x32x32x49xf32) + matmul_114 = paddle._C_ops.matmul(scale_22, transpose_135, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_197 = paddle._C_ops.reshape(data_18, full_int_array_15) + del data_18 + + # pd_op.index_select: (2401x32xf32) <- (169x32xf32, 2401xi64) + index_select_22 = paddle._C_ops.index_select(data_19, reshape_197, 0) + del data_19 + + # pd_op.reshape: (49x49x32xf32) <- (2401x32xf32, 3xi64) + reshape_348 = paddle._C_ops.reshape(index_select_22, full_int_array_16) + + # pd_op.transpose: (32x49x49xf32) <- (49x49x32xf32) + transpose_136 = paddle._C_ops.transpose(reshape_348, [2, 0, 1]) + del reshape_348 + + # pd_op.unsqueeze: (1x32x49x49xf32) <- (32x49x49xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_136, full_int_array_0) + + # pd_op.add: (16x32x49x49xf32) <- (16x32x49x49xf32, 1x32x49x49xf32) + add_223 = paddle._C_ops.add(matmul_114, unsqueeze_33) + + # pd_op.softmax: (16x32x49x49xf32) <- (16x32x49x49xf32) + softmax_22 = paddle._C_ops.softmax(add_223, -1) + del add_223 + + # pd_op.matmul: (16x32x49x32xf32) <- (16x32x49x49xf32, 16x32x49x32xf32) + matmul_146 = paddle._C_ops.matmul(softmax_22, slice_22, False, False) + + # pd_op.transpose: (16x49x32x32xf32) <- (16x32x49x32xf32) + transpose_137 = paddle._C_ops.transpose(matmul_146, [0, 2, 1, 3]) + del matmul_146 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_73 = [16, 49, 1024] + + # pd_op.reshape: (16x49x1024xf32) <- (16x49x32x32xf32, 3xi64) + reshape_198 = paddle._C_ops.reshape(transpose_137, full_int_array_73) + + # pd_op.matmul: (16x49x1024xf32) <- (16x49x1024xf32, 1024x1024xf32) + matmul_115 = paddle._C_ops.matmul(reshape_198, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (16x49x1024xf32) <- (16x49x1024xf32, 1024xf32) + add_156 = paddle._C_ops.add(matmul_115, parameter_22) + del parameter_22 + + # pd_op.reshape: (16x7x7x1024xf32) <- (16x49x1024xf32, 4xi64) + reshape_199 = paddle._C_ops.reshape(add_156, full_int_array_70) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_74 = [-1, 1, 1, 7, 7, 1024] + + # pd_op.reshape: (16x1x1x7x7x1024xf32) <- (16x7x7x1024xf32, 6xi64) + reshape_349 = paddle._C_ops.reshape(reshape_199, full_int_array_74) + + # pd_op.transpose: (16x1x7x1x7x1024xf32) <- (16x1x1x7x7x1024xf32) + transpose_138 = paddle._C_ops.transpose(reshape_349, [0, 1, 3, 2, 4, 5]) + del reshape_349 + + # pd_op.reshape: (16x7x7x1024xf32) <- (16x1x7x1x7x1024xf32, 4xi64) + reshape_200 = paddle._C_ops.reshape(transpose_138, full_int_array_70) + + # pd_op.reshape: (16x49x1024xf32) <- (16x7x7x1024xf32, 3xi64) + reshape_201 = paddle._C_ops.reshape(reshape_200, full_int_array_73) + + # pd_op.full: (xf32) <- () + full_23 = paddle._C_ops.full( + [], + float("0.521739"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_255 = full_23 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_42 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_224 = paddle._C_ops.add(full_23, uniform_42) + del uniform_42 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_42 = paddle._C_ops.floor(add_224) + del add_224 + + # pd_op.divide: (16x49x1024xf32) <- (16x49x1024xf32, xf32) + divide_42 = paddle._C_ops.divide(reshape_201, full_23) + + # pd_op.multiply: (16x49x1024xf32) <- (16x49x1024xf32, 16x1x1xf32) + multiply_42 = paddle._C_ops.multiply(divide_42, floor_42) + + # pd_op.add: (16x49x1024xf32) <- (16x49x1024xf32, 16x49x1024xf32) + add_157 = paddle._C_ops.add(matmul_112, multiply_42) + + # pd_op.layer_norm: (16x49x1024xf32, 16x49xf32, 16x49xf32) <- (16x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_157, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (16x49x4096xf32) <- (16x49x1024xf32, 1024x4096xf32) + matmul_116 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del parameter_19 + + # pd_op.add: (16x49x4096xf32) <- (16x49x4096xf32, 4096xf32) + add_158 = paddle._C_ops.add(matmul_116, parameter_18) + del parameter_18 + + # pd_op.gelu: (16x49x4096xf32) <- (16x49x4096xf32) + gelu_22 = paddle._C_ops.gelu(add_158, False) + + # pd_op.matmul: (16x49x1024xf32) <- (16x49x4096xf32, 4096x1024xf32) + matmul_117 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (16x49x1024xf32) <- (16x49x1024xf32, 1024xf32) + add_159 = paddle._C_ops.add(matmul_117, parameter_16) + del parameter_16 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_43 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_225 = paddle._C_ops.add(full_23, uniform_43) + del uniform_43 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_43 = paddle._C_ops.floor(add_225) + del add_225 + + # pd_op.divide: (16x49x1024xf32) <- (16x49x1024xf32, xf32) + divide_43 = paddle._C_ops.divide(add_159, full_23) + + # pd_op.multiply: (16x49x1024xf32) <- (16x49x1024xf32, 16x1x1xf32) + multiply_43 = paddle._C_ops.multiply(divide_43, floor_43) + + # pd_op.add: (16x49x1024xf32) <- (16x49x1024xf32, 16x49x1024xf32) + add_160 = paddle._C_ops.add(add_157, multiply_43) + + # pd_op.layer_norm: (16x49x1024xf32, 16x49xf32, 16x49xf32) <- (16x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_160, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # pd_op.reshape: (16x7x7x1024xf32) <- (16x49x1024xf32, 4xi64) + reshape_202 = paddle._C_ops.reshape(layer_norm_150, full_int_array_68) + del full_int_array_68 + + # pd_op.roll: (16x7x7x1024xf32) <- (16x7x7x1024xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_202, full_int_array_4, [1, 2]) + + # pd_op.reshape: (16x1x7x1x7x1024xf32) <- (16x7x7x1024xf32, 6xi64) + reshape_350 = paddle._C_ops.reshape(roll_22, full_int_array_69) + del full_int_array_69 + + # pd_op.transpose: (16x1x1x7x7x1024xf32) <- (16x1x7x1x7x1024xf32) + transpose_139 = paddle._C_ops.transpose(reshape_350, [0, 1, 3, 2, 4, 5]) + del reshape_350 + + # pd_op.reshape: (16x7x7x1024xf32) <- (16x1x1x7x7x1024xf32, 4xi64) + reshape_203 = paddle._C_ops.reshape(transpose_139, full_int_array_70) + + # pd_op.reshape: (16x49x1024xf32) <- (16x7x7x1024xf32, 3xi64) + reshape_204 = paddle._C_ops.reshape(reshape_203, full_int_array_71) + del full_int_array_71 + + # pd_op.full: (1x7x7x1xf32) <- () + full_45 = paddle._C_ops.full( + [1, 7, 7, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_45, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_45, full_int_array_21 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_24, set_value__100 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_26, full_int_array_27, set_value__101 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_28, set_value__102 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_22, set_value__103 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_25, full_int_array_30, set_value__104 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_31, full_int_array_32, set_value__105 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_29, full_int_array_33, set_value__106 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_34, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_75 = [1, 1, 7, 1, 7, 1] + + # pd_op.reshape: (1x1x7x1x7x1xf32) <- (1x7x7x1xf32, 6xi64) + reshape_351 = paddle._C_ops.reshape(set_value__11, full_int_array_75) + del full_int_array_75 + + # pd_op.transpose: (1x1x1x7x7x1xf32) <- (1x1x7x1x7x1xf32) + transpose_157 = paddle._C_ops.transpose(reshape_351, [0, 1, 3, 2, 4, 5]) + del reshape_351 + + # pd_op.reshape: (1x7x7x1xf32) <- (1x1x1x7x7x1xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(transpose_157, full_int_array_36) + del full_int_array_36, transpose_157 + + # pd_op.reshape: (1x49xf32) <- (1x7x7x1xf32, 2xi64) + reshape_353 = paddle._C_ops.reshape(reshape_352, full_int_array_37) + del full_int_array_37, reshape_352 + + # pd_op.unsqueeze: (1x1x49xf32) <- (1x49xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_1) + + # pd_op.unsqueeze: (1x49x1xf32) <- (1x49xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_2) + del reshape_353 + + # pd_op.subtract: (1x49x49xf32) <- (1x1x49xf32, 1x49x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_70, unsqueeze_71) + del unsqueeze_70, unsqueeze_71 + + # pd_op.not_equal: (1x49x49xb) <- (1x49x49xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_26) + + # pd_op.full: (1x49x49xf32) <- () + full_46 = paddle._C_ops.full( + [1, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_46, subtract_11) + del full_46, not_equal_11, subtract_11 + + # pd_op.equal: (1x49x49xb) <- (1x49x49xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_26) + del full_26 + + # pd_op.full: (1x49x49xf32) <- () + full_47 = paddle._C_ops.full( + [1, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_23 = paddle._C_ops.where(equal_11, full_47, where_22) + del equal_11, full_47, where_22 + + # pd_op.matmul: (16x49x3072xf32) <- (16x49x1024xf32, 1024x3072xf32) + matmul_118 = paddle._C_ops.matmul(reshape_204, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (16x49x3072xf32) <- (16x49x3072xf32, 3072xf32) + add_161 = paddle._C_ops.add(matmul_118, parameter_12) + del parameter_12 + + # pd_op.reshape: (16x49x3x32x32xf32) <- (16x49x3072xf32, 5xi64) + reshape_354 = paddle._C_ops.reshape(add_161, full_int_array_72) + del full_int_array_72 + + # pd_op.transpose: (3x16x32x49x32xf32) <- (16x49x3x32x32xf32) + transpose_140 = paddle._C_ops.transpose(reshape_354, [2, 0, 3, 1, 4]) + del reshape_354 + + # pd_op.slice: (16x32x49x32xf32) <- (3x16x32x49x32xf32, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x32x49x32xf32) <- (3x16x32x49x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x32x49x32xf32) <- (3x16x32x49x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x32x49x32xf32) <- (16x32x49x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_70, full_0, float("0"), True) + del slice_70 + + # pd_op.transpose: (16x32x32x49xf32) <- (16x32x49x32xf32) + transpose_141 = paddle._C_ops.transpose(slice_71, [0, 1, 3, 2]) + del slice_71 + + # pd_op.matmul: (16x32x49x49xf32) <- (16x32x49x32xf32, 16x32x32x49xf32) + matmul_119 = paddle._C_ops.matmul(scale_23, transpose_141, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_205 = paddle._C_ops.reshape(data_21, full_int_array_15) + del data_21, full_int_array_15 + + # pd_op.index_select: (2401x32xf32) <- (169x32xf32, 2401xi64) + index_select_23 = paddle._C_ops.index_select(data_22, reshape_205, 0) + del data_22 + + # pd_op.reshape: (49x49x32xf32) <- (2401x32xf32, 3xi64) + reshape_355 = paddle._C_ops.reshape(index_select_23, full_int_array_16) + del full_int_array_16 + + # pd_op.transpose: (32x49x49xf32) <- (49x49x32xf32) + transpose_142 = paddle._C_ops.transpose(reshape_355, [2, 0, 1]) + del reshape_355 + + # pd_op.unsqueeze: (1x32x49x49xf32) <- (32x49x49xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(transpose_142, full_int_array_0) + + # pd_op.add: (16x32x49x49xf32) <- (16x32x49x49xf32, 1x32x49x49xf32) + add_162 = paddle._C_ops.add(matmul_119, unsqueeze_34) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_76 = [16, 1, 32, 49, 49] + + # pd_op.reshape: (16x1x32x49x49xf32) <- (16x32x49x49xf32, 5xi64) + reshape_206 = paddle._C_ops.reshape(add_162, full_int_array_76) + del full_int_array_76 + + # pd_op.unsqueeze: (1x1x49x49xf32) <- (1x49x49xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(where_23, full_int_array_1) + del where_23 + + # pd_op.unsqueeze: (1x1x1x49x49xf32) <- (1x1x49x49xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_72, full_int_array_0) + del unsqueeze_72 + + # pd_op.add: (16x1x32x49x49xf32) <- (16x1x32x49x49xf32, 1x1x1x49x49xf32) + add_163 = paddle._C_ops.add(reshape_206, unsqueeze_35) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_77 = [16, 32, 49, 49] + + # pd_op.reshape: (16x32x49x49xf32) <- (16x1x32x49x49xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(add_163, full_int_array_77) + del full_int_array_77 + + # pd_op.softmax: (16x32x49x49xf32) <- (16x32x49x49xf32) + softmax_23 = paddle._C_ops.softmax(reshape_356, -1) + del reshape_356 + + # pd_op.matmul: (16x32x49x32xf32) <- (16x32x49x49xf32, 16x32x49x32xf32) + matmul_147 = paddle._C_ops.matmul(softmax_23, slice_23, False, False) + + # pd_op.transpose: (16x49x32x32xf32) <- (16x32x49x32xf32) + transpose_143 = paddle._C_ops.transpose(matmul_147, [0, 2, 1, 3]) + del matmul_147 + + # pd_op.reshape: (16x49x1024xf32) <- (16x49x32x32xf32, 3xi64) + reshape_207 = paddle._C_ops.reshape(transpose_143, full_int_array_73) + + # pd_op.matmul: (16x49x1024xf32) <- (16x49x1024xf32, 1024x1024xf32) + matmul_120 = paddle._C_ops.matmul(reshape_207, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (16x49x1024xf32) <- (16x49x1024xf32, 1024xf32) + add_164 = paddle._C_ops.add(matmul_120, parameter_10) + del parameter_10 + + # pd_op.reshape: (16x7x7x1024xf32) <- (16x49x1024xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(add_164, full_int_array_70) + + # pd_op.reshape: (16x1x1x7x7x1024xf32) <- (16x7x7x1024xf32, 6xi64) + reshape_357 = paddle._C_ops.reshape(reshape_208, full_int_array_74) + del full_int_array_74 + + # pd_op.transpose: (16x1x7x1x7x1024xf32) <- (16x1x1x7x7x1024xf32) + transpose_144 = paddle._C_ops.transpose(reshape_357, [0, 1, 3, 2, 4, 5]) + del reshape_357 + + # pd_op.reshape: (16x7x7x1024xf32) <- (16x1x7x1x7x1024xf32, 4xi64) + reshape_209 = paddle._C_ops.reshape(transpose_144, full_int_array_70) + del full_int_array_70 + + # pd_op.roll: (16x7x7x1024xf32) <- (16x7x7x1024xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_209, full_int_array_5, [1, 2]) + + # pd_op.reshape: (16x49x1024xf32) <- (16x7x7x1024xf32, 3xi64) + reshape_210 = paddle._C_ops.reshape(roll_23, full_int_array_73) + del full_int_array_73 + + # pd_op.full: (xf32) <- () + full_24 = paddle._C_ops.full( + [], float("0.5"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_266 = full_24 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_44 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_226 = paddle._C_ops.add(full_24, uniform_44) + del uniform_44 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_44 = paddle._C_ops.floor(add_226) + del add_226 + + # pd_op.divide: (16x49x1024xf32) <- (16x49x1024xf32, xf32) + divide_44 = paddle._C_ops.divide(reshape_210, full_24) + + # pd_op.multiply: (16x49x1024xf32) <- (16x49x1024xf32, 16x1x1xf32) + multiply_44 = paddle._C_ops.multiply(divide_44, floor_44) + + # pd_op.add: (16x49x1024xf32) <- (16x49x1024xf32, 16x49x1024xf32) + add_165 = paddle._C_ops.add(add_160, multiply_44) + + # pd_op.layer_norm: (16x49x1024xf32, 16x49xf32, 16x49xf32) <- (16x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_165, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (16x49x4096xf32) <- (16x49x1024xf32, 1024x4096xf32) + matmul_121 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (16x49x4096xf32) <- (16x49x4096xf32, 4096xf32) + add_166 = paddle._C_ops.add(matmul_121, parameter_6) + del parameter_6 + + # pd_op.gelu: (16x49x4096xf32) <- (16x49x4096xf32) + gelu_23 = paddle._C_ops.gelu(add_166, False) + + # pd_op.matmul: (16x49x1024xf32) <- (16x49x4096xf32, 4096x1024xf32) + matmul_122 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del parameter_5 + + # pd_op.add: (16x49x1024xf32) <- (16x49x1024xf32, 1024xf32) + add_167 = paddle._C_ops.add(matmul_122, parameter_4) + del parameter_4 + + # pd_op.uniform: (16x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_45 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + del full_29, full_30, full_int_array_40 + + # pd_op.add: (16x1x1xf32) <- (xf32, 16x1x1xf32) + add_227 = paddle._C_ops.add(full_24, uniform_45) + del uniform_45 + + # pd_op.floor: (16x1x1xf32) <- (16x1x1xf32) + floor_45 = paddle._C_ops.floor(add_227) + del add_227 + + # pd_op.divide: (16x49x1024xf32) <- (16x49x1024xf32, xf32) + divide_45 = paddle._C_ops.divide(add_167, full_24) + + # pd_op.multiply: (16x49x1024xf32) <- (16x49x1024xf32, 16x1x1xf32) + multiply_45 = paddle._C_ops.multiply(divide_45, floor_45) + + # pd_op.add: (16x49x1024xf32) <- (16x49x1024xf32, 16x49x1024xf32) + add_168 = paddle._C_ops.add(add_165, multiply_45) + + # pd_op.layer_norm: (16x49x1024xf32, 16x49xf32, 16x49xf32) <- (16x49x1024xf32, 1024xf32, 1024xf32) + layer_norm_158, layer_norm_156, layer_norm_157 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_168, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_2, parameter_3 + + # pd_op.transpose: (16x1024x49xf32) <- (16x49x1024xf32) + transpose_145 = paddle._C_ops.transpose(layer_norm_158, [0, 2, 1]) + del layer_norm_158 + + # pd_op.unsqueeze: (16x1024x1x49xf32) <- (16x1024x49xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_145, full_int_array_2) + + # pd_op.pool2d: (16x1024x1x1xf32) <- (16x1024x1x49xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_36, + full_int_array_23, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_23 + + # pd_op.squeeze: (16x1024x1xf32) <- (16x1024x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_2) + + # pd_op.flatten: (16x1024xf32) <- (16x1024x1xf32) + flatten_0 = paddle._C_ops.flatten(squeeze_0, 1, 2) + + # pd_op.matmul: (16x102xf32) <- (16x1024xf32, 1024x102xf32) + matmul_123 = paddle._C_ops.matmul(flatten_0, parameter_1, False, False) + del parameter_1 + + # pd_op.add: (16x102xf32) <- (16x102xf32, 102xf32) + add_169 = paddle._C_ops.add(matmul_123, parameter_0) + del ( + assign_0, + assign_1, + assign_10, + assign_101, + assign_103, + assign_104, + assign_105, + assign_106, + assign_107, + assign_108, + assign_109, + assign_111, + assign_112, + assign_114, + assign_115, + assign_116, + assign_117, + assign_118, + assign_119, + assign_12, + assign_121, + assign_123, + assign_124, + assign_125, + assign_126, + assign_127, + assign_128, + assign_129, + assign_13, + assign_131, + assign_132, + assign_134, + assign_135, + assign_136, + assign_137, + assign_138, + assign_139, + assign_14, + assign_141, + assign_143, + assign_144, + assign_145, + assign_146, + assign_147, + assign_148, + assign_149, + assign_15, + assign_151, + assign_152, + assign_154, + assign_155, + assign_156, + assign_157, + assign_158, + assign_159, + assign_16, + assign_161, + assign_163, + assign_164, + assign_165, + assign_166, + assign_167, + assign_168, + assign_169, + assign_17, + assign_171, + assign_172, + assign_174, + assign_175, + assign_176, + assign_177, + assign_178, + assign_179, + assign_18, + assign_181, + assign_183, + assign_184, + assign_185, + assign_186, + assign_187, + assign_188, + assign_189, + assign_19, + assign_191, + assign_192, + assign_194, + assign_195, + assign_196, + assign_197, + assign_198, + assign_199, + assign_2, + assign_20, + assign_201, + assign_203, + assign_204, + assign_205, + assign_206, + assign_207, + assign_208, + assign_209, + assign_21, + assign_211, + assign_212, + assign_214, + assign_215, + assign_216, + assign_217, + assign_218, + assign_219, + assign_22, + assign_221, + assign_223, + assign_224, + assign_225, + assign_226, + assign_227, + assign_228, + assign_229, + assign_23, + assign_231, + assign_232, + assign_234, + assign_235, + assign_236, + assign_237, + assign_238, + assign_239, + assign_24, + assign_240, + assign_241, + assign_242, + assign_243, + assign_244, + assign_245, + assign_247, + assign_248, + assign_249, + assign_25, + assign_250, + assign_251, + assign_252, + assign_254, + assign_256, + assign_257, + assign_258, + assign_259, + assign_26, + assign_260, + assign_261, + assign_262, + assign_264, + assign_265, + assign_267, + assign_268, + assign_269, + assign_28, + assign_3, + assign_30, + assign_31, + assign_32, + assign_33, + assign_34, + assign_35, + assign_36, + assign_38, + assign_39, + assign_4, + assign_41, + assign_42, + assign_43, + assign_44, + assign_45, + assign_46, + assign_47, + assign_48, + assign_49, + assign_5, + assign_50, + assign_51, + assign_52, + assign_54, + assign_55, + assign_56, + assign_57, + assign_58, + assign_59, + assign_6, + assign_61, + assign_63, + assign_64, + assign_65, + assign_66, + assign_67, + assign_68, + assign_69, + assign_7, + assign_71, + assign_72, + assign_74, + assign_75, + assign_76, + assign_77, + assign_78, + assign_79, + assign_8, + assign_81, + assign_83, + assign_84, + assign_85, + assign_86, + assign_87, + assign_88, + assign_89, + assign_91, + assign_92, + assign_94, + assign_95, + assign_96, + assign_97, + assign_98, + assign_99, + full_int_array_0, + full_int_array_1, + full_int_array_2, + full_int_array_3, + full_int_array_4, + full_int_array_5, + full_int_array_6, + full_int_array_7, + full_int_array_8, + parameter_0, + ) + + return ( + conv2d_0, + reshape_0, + add_0, + transpose_0, + layer_norm_0, + layer_norm_1, + layer_norm_2, + layer_norm_3, + layer_norm_4, + layer_norm_5, + reshape_1, + transpose_1, + reshape_2, + reshape_3, + matmul_0, + add_1, + transpose_2, + slice_0, + full_0, + scale_0, + transpose_3, + matmul_1, + reshape_4, + index_select_0, + transpose_4, + unsqueeze_0, + softmax_0, + transpose_5, + reshape_5, + matmul_2, + add_2, + reshape_6, + transpose_6, + reshape_7, + reshape_8, + add_3, + layer_norm_6, + layer_norm_7, + layer_norm_8, + matmul_3, + add_4, + gelu_0, + matmul_4, + add_5, + add_6, + layer_norm_9, + layer_norm_10, + layer_norm_11, + reshape_9, + roll_0, + transpose_7, + reshape_10, + reshape_11, + matmul_5, + add_7, + transpose_8, + slice_1, + assign_9, + scale_1, + transpose_9, + matmul_6, + reshape_12, + index_select_1, + transpose_10, + unsqueeze_1, + add_8, + reshape_13, + unsqueeze_2, + add_9, + softmax_1, + transpose_11, + reshape_14, + matmul_7, + add_10, + reshape_15, + transpose_12, + reshape_16, + roll_1, + reshape_17, + full_1, + floor_0, + divide_0, + multiply_0, + add_11, + layer_norm_12, + layer_norm_13, + layer_norm_14, + matmul_8, + add_12, + gelu_1, + matmul_9, + add_13, + assign_11, + floor_1, + divide_1, + multiply_1, + add_14, + reshape_18, + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + full_2, + concat_0, + reshape_19, + layer_norm_15, + layer_norm_16, + layer_norm_17, + matmul_10, + layer_norm_18, + layer_norm_19, + layer_norm_20, + reshape_20, + transpose_13, + reshape_21, + reshape_22, + matmul_11, + add_15, + transpose_14, + slice_2, + assign_27, + scale_2, + transpose_15, + matmul_12, + reshape_23, + index_select_2, + transpose_16, + unsqueeze_3, + softmax_2, + transpose_17, + reshape_24, + matmul_13, + add_16, + reshape_25, + transpose_18, + reshape_26, + reshape_27, + full_3, + floor_2, + divide_2, + multiply_2, + add_17, + layer_norm_21, + layer_norm_22, + layer_norm_23, + matmul_14, + add_18, + gelu_2, + matmul_15, + add_19, + assign_29, + floor_3, + divide_3, + multiply_3, + add_20, + layer_norm_24, + layer_norm_25, + layer_norm_26, + reshape_28, + roll_2, + transpose_19, + reshape_29, + reshape_30, + matmul_16, + add_21, + transpose_20, + slice_3, + assign_37, + scale_3, + transpose_21, + matmul_17, + reshape_31, + index_select_3, + transpose_22, + unsqueeze_4, + add_22, + reshape_32, + unsqueeze_5, + add_23, + softmax_3, + transpose_23, + reshape_33, + matmul_18, + add_24, + reshape_34, + transpose_24, + reshape_35, + roll_3, + reshape_36, + full_4, + floor_4, + divide_4, + multiply_4, + add_25, + layer_norm_27, + layer_norm_28, + layer_norm_29, + matmul_19, + add_26, + gelu_3, + matmul_20, + add_27, + assign_40, + floor_5, + divide_5, + multiply_5, + add_28, + reshape_37, + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + assign_53, + concat_1, + reshape_38, + layer_norm_30, + layer_norm_31, + layer_norm_32, + matmul_21, + layer_norm_33, + layer_norm_34, + layer_norm_35, + reshape_39, + transpose_25, + reshape_40, + reshape_41, + matmul_22, + add_29, + transpose_26, + slice_4, + assign_60, + scale_4, + transpose_27, + matmul_23, + reshape_42, + index_select_4, + transpose_28, + unsqueeze_6, + softmax_4, + transpose_29, + reshape_43, + matmul_24, + add_30, + reshape_44, + transpose_30, + reshape_45, + reshape_46, + full_5, + floor_6, + divide_6, + multiply_6, + add_31, + layer_norm_36, + layer_norm_37, + layer_norm_38, + matmul_25, + add_32, + gelu_4, + matmul_26, + add_33, + assign_62, + floor_7, + divide_7, + multiply_7, + add_34, + layer_norm_39, + layer_norm_40, + layer_norm_41, + reshape_47, + roll_4, + transpose_31, + reshape_48, + reshape_49, + matmul_27, + add_35, + transpose_32, + slice_5, + assign_70, + scale_5, + transpose_33, + matmul_28, + reshape_50, + index_select_5, + transpose_34, + unsqueeze_7, + add_36, + reshape_51, + unsqueeze_8, + add_37, + softmax_5, + transpose_35, + reshape_52, + matmul_29, + add_38, + reshape_53, + transpose_36, + reshape_54, + roll_5, + reshape_55, + full_6, + floor_8, + divide_8, + multiply_8, + add_39, + layer_norm_42, + layer_norm_43, + layer_norm_44, + matmul_30, + add_40, + gelu_5, + matmul_31, + add_41, + assign_73, + floor_9, + divide_9, + multiply_9, + add_42, + layer_norm_45, + layer_norm_46, + layer_norm_47, + reshape_56, + transpose_37, + reshape_57, + reshape_58, + matmul_32, + add_43, + transpose_38, + slice_6, + assign_80, + scale_6, + transpose_39, + matmul_33, + reshape_59, + index_select_6, + transpose_40, + unsqueeze_9, + softmax_6, + transpose_41, + reshape_60, + matmul_34, + add_44, + reshape_61, + transpose_42, + reshape_62, + reshape_63, + full_7, + floor_10, + divide_10, + multiply_10, + add_45, + layer_norm_48, + layer_norm_49, + layer_norm_50, + matmul_35, + add_46, + gelu_6, + matmul_36, + add_47, + assign_82, + floor_11, + divide_11, + multiply_11, + add_48, + layer_norm_51, + layer_norm_52, + layer_norm_53, + reshape_64, + roll_6, + transpose_43, + reshape_65, + reshape_66, + matmul_37, + add_49, + transpose_44, + slice_7, + assign_90, + scale_7, + transpose_45, + matmul_38, + reshape_67, + index_select_7, + transpose_46, + unsqueeze_10, + add_50, + reshape_68, + unsqueeze_11, + add_51, + softmax_7, + transpose_47, + reshape_69, + matmul_39, + add_52, + reshape_70, + transpose_48, + reshape_71, + roll_7, + reshape_72, + full_8, + floor_12, + divide_12, + multiply_12, + add_53, + layer_norm_54, + layer_norm_55, + layer_norm_56, + matmul_40, + add_54, + gelu_7, + matmul_41, + add_55, + assign_93, + floor_13, + divide_13, + multiply_13, + add_56, + layer_norm_57, + layer_norm_58, + layer_norm_59, + reshape_73, + transpose_49, + reshape_74, + reshape_75, + matmul_42, + add_57, + transpose_50, + slice_8, + assign_100, + scale_8, + transpose_51, + matmul_43, + reshape_76, + index_select_8, + transpose_52, + unsqueeze_12, + softmax_8, + transpose_53, + reshape_77, + matmul_44, + add_58, + reshape_78, + transpose_54, + reshape_79, + reshape_80, + full_9, + floor_14, + divide_14, + multiply_14, + add_59, + layer_norm_60, + layer_norm_61, + layer_norm_62, + matmul_45, + add_60, + gelu_8, + matmul_46, + add_61, + assign_102, + floor_15, + divide_15, + multiply_15, + add_62, + layer_norm_63, + layer_norm_64, + layer_norm_65, + reshape_81, + roll_8, + transpose_55, + reshape_82, + reshape_83, + matmul_47, + add_63, + transpose_56, + slice_9, + assign_110, + scale_9, + transpose_57, + matmul_48, + reshape_84, + index_select_9, + transpose_58, + unsqueeze_13, + add_64, + reshape_85, + unsqueeze_14, + add_65, + softmax_9, + transpose_59, + reshape_86, + matmul_49, + add_66, + reshape_87, + transpose_60, + reshape_88, + roll_9, + reshape_89, + full_10, + floor_16, + divide_16, + multiply_16, + add_67, + layer_norm_66, + layer_norm_67, + layer_norm_68, + matmul_50, + add_68, + gelu_9, + matmul_51, + add_69, + assign_113, + floor_17, + divide_17, + multiply_17, + add_70, + layer_norm_69, + layer_norm_70, + layer_norm_71, + reshape_90, + transpose_61, + reshape_91, + reshape_92, + matmul_52, + add_71, + transpose_62, + slice_10, + assign_120, + scale_10, + transpose_63, + matmul_53, + reshape_93, + index_select_10, + transpose_64, + unsqueeze_15, + softmax_10, + transpose_65, + reshape_94, + matmul_54, + add_72, + reshape_95, + transpose_66, + reshape_96, + reshape_97, + full_11, + floor_18, + divide_18, + multiply_18, + add_73, + layer_norm_72, + layer_norm_73, + layer_norm_74, + matmul_55, + add_74, + gelu_10, + matmul_56, + add_75, + assign_122, + floor_19, + divide_19, + multiply_19, + add_76, + layer_norm_75, + layer_norm_76, + layer_norm_77, + reshape_98, + roll_10, + transpose_67, + reshape_99, + reshape_100, + matmul_57, + add_77, + transpose_68, + slice_11, + assign_130, + scale_11, + transpose_69, + matmul_58, + reshape_101, + index_select_11, + transpose_70, + unsqueeze_16, + add_78, + reshape_102, + unsqueeze_17, + add_79, + softmax_11, + transpose_71, + reshape_103, + matmul_59, + add_80, + reshape_104, + transpose_72, + reshape_105, + roll_11, + reshape_106, + full_12, + floor_20, + divide_20, + multiply_20, + add_81, + layer_norm_78, + layer_norm_79, + layer_norm_80, + matmul_60, + add_82, + gelu_11, + matmul_61, + add_83, + assign_133, + floor_21, + divide_21, + multiply_21, + add_84, + layer_norm_81, + layer_norm_82, + layer_norm_83, + reshape_107, + transpose_73, + reshape_108, + reshape_109, + matmul_62, + add_85, + transpose_74, + slice_12, + assign_140, + scale_12, + transpose_75, + matmul_63, + reshape_110, + index_select_12, + transpose_76, + unsqueeze_18, + softmax_12, + transpose_77, + reshape_111, + matmul_64, + add_86, + reshape_112, + transpose_78, + reshape_113, + reshape_114, + full_13, + floor_22, + divide_22, + multiply_22, + add_87, + layer_norm_84, + layer_norm_85, + layer_norm_86, + matmul_65, + add_88, + gelu_12, + matmul_66, + add_89, + assign_142, + floor_23, + divide_23, + multiply_23, + add_90, + layer_norm_87, + layer_norm_88, + layer_norm_89, + reshape_115, + roll_12, + transpose_79, + reshape_116, + reshape_117, + matmul_67, + add_91, + transpose_80, + slice_13, + assign_150, + scale_13, + transpose_81, + matmul_68, + reshape_118, + index_select_13, + transpose_82, + unsqueeze_19, + add_92, + reshape_119, + unsqueeze_20, + add_93, + softmax_13, + transpose_83, + reshape_120, + matmul_69, + add_94, + reshape_121, + transpose_84, + reshape_122, + roll_13, + reshape_123, + full_14, + floor_24, + divide_24, + multiply_24, + add_95, + layer_norm_90, + layer_norm_91, + layer_norm_92, + matmul_70, + add_96, + gelu_13, + matmul_71, + add_97, + assign_153, + floor_25, + divide_25, + multiply_25, + add_98, + layer_norm_93, + layer_norm_94, + layer_norm_95, + reshape_124, + transpose_85, + reshape_125, + reshape_126, + matmul_72, + add_99, + transpose_86, + slice_14, + assign_160, + scale_14, + transpose_87, + matmul_73, + reshape_127, + index_select_14, + transpose_88, + unsqueeze_21, + softmax_14, + transpose_89, + reshape_128, + matmul_74, + add_100, + reshape_129, + transpose_90, + reshape_130, + reshape_131, + full_15, + floor_26, + divide_26, + multiply_26, + add_101, + layer_norm_96, + layer_norm_97, + layer_norm_98, + matmul_75, + add_102, + gelu_14, + matmul_76, + add_103, + assign_162, + floor_27, + divide_27, + multiply_27, + add_104, + layer_norm_99, + layer_norm_100, + layer_norm_101, + reshape_132, + roll_14, + transpose_91, + reshape_133, + reshape_134, + matmul_77, + add_105, + transpose_92, + slice_15, + assign_170, + scale_15, + transpose_93, + matmul_78, + reshape_135, + index_select_15, + transpose_94, + unsqueeze_22, + add_106, + reshape_136, + unsqueeze_23, + add_107, + softmax_15, + transpose_95, + reshape_137, + matmul_79, + add_108, + reshape_138, + transpose_96, + reshape_139, + roll_15, + reshape_140, + full_16, + floor_28, + divide_28, + multiply_28, + add_109, + layer_norm_102, + layer_norm_103, + layer_norm_104, + matmul_80, + add_110, + gelu_15, + matmul_81, + add_111, + assign_173, + floor_29, + divide_29, + multiply_29, + add_112, + layer_norm_105, + layer_norm_106, + layer_norm_107, + reshape_141, + transpose_97, + reshape_142, + reshape_143, + matmul_82, + add_113, + transpose_98, + slice_16, + assign_180, + scale_16, + transpose_99, + matmul_83, + reshape_144, + index_select_16, + transpose_100, + unsqueeze_24, + softmax_16, + transpose_101, + reshape_145, + matmul_84, + add_114, + reshape_146, + transpose_102, + reshape_147, + reshape_148, + full_17, + floor_30, + divide_30, + multiply_30, + add_115, + layer_norm_108, + layer_norm_109, + layer_norm_110, + matmul_85, + add_116, + gelu_16, + matmul_86, + add_117, + assign_182, + floor_31, + divide_31, + multiply_31, + add_118, + layer_norm_111, + layer_norm_112, + layer_norm_113, + reshape_149, + roll_16, + transpose_103, + reshape_150, + reshape_151, + matmul_87, + add_119, + transpose_104, + slice_17, + assign_190, + scale_17, + transpose_105, + matmul_88, + reshape_152, + index_select_17, + transpose_106, + unsqueeze_25, + add_120, + reshape_153, + unsqueeze_26, + add_121, + softmax_17, + transpose_107, + reshape_154, + matmul_89, + add_122, + reshape_155, + transpose_108, + reshape_156, + roll_17, + reshape_157, + full_18, + floor_32, + divide_32, + multiply_32, + add_123, + layer_norm_114, + layer_norm_115, + layer_norm_116, + matmul_90, + add_124, + gelu_17, + matmul_91, + add_125, + assign_193, + floor_33, + divide_33, + multiply_33, + add_126, + layer_norm_117, + layer_norm_118, + layer_norm_119, + reshape_158, + transpose_109, + reshape_159, + reshape_160, + matmul_92, + add_127, + transpose_110, + slice_18, + assign_200, + scale_18, + transpose_111, + matmul_93, + reshape_161, + index_select_18, + transpose_112, + unsqueeze_27, + softmax_18, + transpose_113, + reshape_162, + matmul_94, + add_128, + reshape_163, + transpose_114, + reshape_164, + reshape_165, + full_19, + floor_34, + divide_34, + multiply_34, + add_129, + layer_norm_120, + layer_norm_121, + layer_norm_122, + matmul_95, + add_130, + gelu_18, + matmul_96, + add_131, + assign_202, + floor_35, + divide_35, + multiply_35, + add_132, + layer_norm_123, + layer_norm_124, + layer_norm_125, + reshape_166, + roll_18, + transpose_115, + reshape_167, + reshape_168, + matmul_97, + add_133, + transpose_116, + slice_19, + assign_210, + scale_19, + transpose_117, + matmul_98, + reshape_169, + index_select_19, + transpose_118, + unsqueeze_28, + add_134, + reshape_170, + unsqueeze_29, + add_135, + softmax_19, + transpose_119, + reshape_171, + matmul_99, + add_136, + reshape_172, + transpose_120, + reshape_173, + roll_19, + reshape_174, + full_20, + floor_36, + divide_36, + multiply_36, + add_137, + layer_norm_126, + layer_norm_127, + layer_norm_128, + matmul_100, + add_138, + gelu_19, + matmul_101, + add_139, + assign_213, + floor_37, + divide_37, + multiply_37, + add_140, + layer_norm_129, + layer_norm_130, + layer_norm_131, + reshape_175, + transpose_121, + reshape_176, + reshape_177, + matmul_102, + add_141, + transpose_122, + slice_20, + assign_220, + scale_20, + transpose_123, + matmul_103, + reshape_178, + index_select_20, + transpose_124, + unsqueeze_30, + softmax_20, + transpose_125, + reshape_179, + matmul_104, + add_142, + reshape_180, + transpose_126, + reshape_181, + reshape_182, + full_21, + floor_38, + divide_38, + multiply_38, + add_143, + layer_norm_132, + layer_norm_133, + layer_norm_134, + matmul_105, + add_144, + gelu_20, + matmul_106, + add_145, + assign_222, + floor_39, + divide_39, + multiply_39, + add_146, + layer_norm_135, + layer_norm_136, + layer_norm_137, + reshape_183, + roll_20, + transpose_127, + reshape_184, + reshape_185, + matmul_107, + add_147, + transpose_128, + slice_21, + assign_230, + scale_21, + transpose_129, + matmul_108, + reshape_186, + index_select_21, + transpose_130, + unsqueeze_31, + add_148, + reshape_187, + unsqueeze_32, + add_149, + softmax_21, + transpose_131, + reshape_188, + matmul_109, + add_150, + reshape_189, + transpose_132, + reshape_190, + roll_21, + reshape_191, + full_22, + floor_40, + divide_40, + multiply_40, + add_151, + layer_norm_138, + layer_norm_139, + layer_norm_140, + matmul_110, + add_152, + gelu_21, + matmul_111, + add_153, + assign_233, + floor_41, + divide_41, + multiply_41, + add_154, + reshape_192, + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + assign_246, + concat_2, + reshape_193, + layer_norm_141, + layer_norm_142, + layer_norm_143, + matmul_112, + layer_norm_144, + layer_norm_145, + layer_norm_146, + reshape_194, + transpose_133, + reshape_195, + reshape_196, + matmul_113, + add_155, + transpose_134, + slice_22, + assign_253, + scale_22, + transpose_135, + matmul_114, + reshape_197, + index_select_22, + transpose_136, + unsqueeze_33, + softmax_22, + transpose_137, + reshape_198, + matmul_115, + add_156, + reshape_199, + transpose_138, + reshape_200, + reshape_201, + full_23, + floor_42, + divide_42, + multiply_42, + add_157, + layer_norm_147, + layer_norm_148, + layer_norm_149, + matmul_116, + add_158, + gelu_22, + matmul_117, + add_159, + assign_255, + floor_43, + divide_43, + multiply_43, + add_160, + layer_norm_150, + layer_norm_151, + layer_norm_152, + reshape_202, + roll_22, + transpose_139, + reshape_203, + reshape_204, + matmul_118, + add_161, + transpose_140, + slice_23, + assign_263, + scale_23, + transpose_141, + matmul_119, + reshape_205, + index_select_23, + transpose_142, + unsqueeze_34, + add_162, + reshape_206, + unsqueeze_35, + add_163, + softmax_23, + transpose_143, + reshape_207, + matmul_120, + add_164, + reshape_208, + transpose_144, + reshape_209, + roll_23, + reshape_210, + full_24, + floor_44, + divide_44, + multiply_44, + add_165, + layer_norm_153, + layer_norm_154, + layer_norm_155, + matmul_121, + add_166, + gelu_23, + matmul_122, + add_167, + assign_266, + floor_45, + divide_45, + multiply_45, + add_168, + layer_norm_156, + layer_norm_157, + transpose_145, + unsqueeze_36, + pool2d_0, + squeeze_0, + flatten_0, + matmul_123, + add_169, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/weight_meta.py new file mode 100644 index 00000000..69d1deec --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_base_patch4_window7_224/subgraph_2/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [1024, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [4096, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1024, 4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [1024, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1024, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [4096, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [1024, 4096] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1024, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [1024, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [2048, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [2048, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [512, 2048] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [512, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [512, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [1024, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1024, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [256, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [256, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [256, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [1024, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [256, 1024] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [256, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [256, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [512, 256] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [512, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [128, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [128, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [128, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [512, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [128, 512] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [128, 128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [128, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [128] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [128, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/graph_net.json b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/input_meta.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/input_meta.py new file mode 100644 index 00000000..8b2ba575 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [4, 3, 384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [529, 48] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [529, 12] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [529, 48] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [529, 6] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [529, 12] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [529, 6] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/model.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/model.py new file mode 100644 index 00000000..c4639d90 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/model.py @@ -0,0 +1,10708 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.conv2d: (4x192x96x96xf32) <- (4x3x384x384xf32, 192x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, -1, 1, 1] + + # pd_op.reshape: (1x192x1x1xf32) <- (192xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_9) + del full_int_array_9, parameter_303 + + # pd_op.add: (4x192x96x96xf32) <- (4x192x96x96xf32, 1x192x1x1xf32) + add_0 = paddle._C_ops.add(conv2d_0, reshape_0) + + # pd_op.flatten: (4x192x9216xf32) <- (4x192x96x96xf32) + flatten_1 = paddle._C_ops.flatten(add_0, 2, 3) + + # pd_op.transpose: (4x9216x192xf32) <- (4x192x9216xf32) + transpose_0 = paddle._C_ops.transpose(flatten_1, [0, 2, 1]) + del flatten_1 + + # pd_op.layer_norm: (4x9216x192xf32, 4x9216xf32, 4x9216xf32) <- (4x9216x192xf32, 192xf32, 192xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302 + + # pd_op.layer_norm: (4x9216x192xf32, 4x9216xf32, 4x9216xf32) <- (4x9216x192xf32, 192xf32, 192xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [4, 96, 96, 192] + + # pd_op.reshape: (4x96x96x192xf32) <- (4x9216x192xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, full_int_array_10) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_11 = [4, 8, 12, 8, 12, 192] + + # pd_op.reshape: (4x8x12x8x12x192xf32) <- (4x96x96x192xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_1, full_int_array_11) + + # pd_op.transpose: (4x8x8x12x12x192xf32) <- (4x8x12x8x12x192xf32) + transpose_1 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_12 = [-1, 12, 12, 192] + + # pd_op.reshape: (256x12x12x192xf32) <- (4x8x8x12x12x192xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(transpose_1, full_int_array_12) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_13 = [-1, 144, 192] + + # pd_op.reshape: (256x144x192xf32) <- (256x12x12x192xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(reshape_2, full_int_array_13) + + # pd_op.matmul: (256x144x576xf32) <- (256x144x192xf32, 192x576xf32) + matmul_0 = paddle._C_ops.matmul(reshape_3, parameter_298, False, False) + del parameter_298 + + # pd_op.add: (256x144x576xf32) <- (256x144x576xf32, 576xf32) + add_1 = paddle._C_ops.add(matmul_0, parameter_297) + del parameter_297 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_14 = [256, 144, 3, 6, 32] + + # pd_op.reshape: (256x144x3x6x32xf32) <- (256x144x576xf32, 5xi64) + reshape_212 = paddle._C_ops.reshape(add_1, full_int_array_14) + + # pd_op.transpose: (3x256x6x144x32xf32) <- (256x144x3x6x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_212, [2, 0, 3, 1, 4]) + del reshape_212 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_264 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_257 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_254 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_247 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_231 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_224 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_221 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_214 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_211 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_204 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_201 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_194 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_191 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_184 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_181 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_174 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_171 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_164 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_161 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_154 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_151 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_144 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_141 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_134 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_131 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_124 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_121 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_114 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_111 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_104 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_101 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_94 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_91 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_84 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_81 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_74 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_71 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_64 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_61 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_54 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_38 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_31 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_28 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_21 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_10 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_3 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_2 = full_int_array_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [1] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_259 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_258 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_249 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_248 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_226 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_225 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_216 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_215 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_206 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_205 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_196 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_195 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_186 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_185 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_176 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_175 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_166 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_165 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_156 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_155 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_146 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_145 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_136 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_135 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_126 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_125 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_116 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_115 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_106 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_105 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_96 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_95 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_86 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_85 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_76 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_75 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_66 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_65 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_56 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_55 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_33 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_32 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_23 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_22 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_5 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_4 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_0 = full_int_array_1 + + # pd_op.slice: (256x6x144x32xf32) <- (3x256x6x144x32xf32, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [2] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_269 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_267 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_261 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_260 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_251 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_250 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_228 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_227 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_218 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_217 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_208 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_207 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_198 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_197 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_188 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_187 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_178 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_177 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_168 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_167 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_158 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_157 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_148 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_147 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_138 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_137 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_128 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_127 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_118 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_117 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_108 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_107 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_98 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_97 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_88 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_87 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_78 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_77 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_68 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_67 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_58 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_57 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_35 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_34 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_25 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_24 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_7 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_6 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_1 = full_int_array_2 + + # pd_op.slice: (256x6x144x32xf32) <- (3x256x6x144x32xf32, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [3] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_262 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_252 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_229 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_219 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_209 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_199 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_189 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_179 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_169 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_159 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_149 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_139 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_129 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_119 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_109 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_99 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_89 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_79 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_69 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_59 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_36 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_26 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_8 = full_int_array_3 + + # pd_op.slice: (256x6x144x32xf32) <- (3x256x6x144x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xf32) <- (1xf32) + assign_263 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_253 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_230 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_220 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_210 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_200 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_190 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_180 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_170 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_160 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_150 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_140 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_130 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_120 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_110 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_100 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_90 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_80 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_70 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_60 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_37 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_27 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_9 = full_0 + + # pd_op.scale: (256x6x144x32xf32) <- (256x6x144x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_24, full_0, float("0"), True) + del slice_24 + + # pd_op.transpose: (256x6x32x144xf32) <- (256x6x144x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_25, [0, 1, 3, 2]) + del slice_25 + + # pd_op.matmul: (256x6x144x144xf32) <- (256x6x144x32xf32, 256x6x32x144xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_15 = [-1] + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_4 = paddle._C_ops.reshape(data_23, full_int_array_15) + del data_23 + + # pd_op.index_select: (20736x6xf32) <- (529x6xf32, 20736xi64) + index_select_0 = paddle._C_ops.index_select(data_24, reshape_4, 0) + del data_24 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_16 = [144, 144, -1] + + # pd_op.reshape: (144x144x6xf32) <- (20736x6xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(index_select_0, full_int_array_16) + + # pd_op.transpose: (6x144x144xf32) <- (144x144x6xf32) + transpose_4 = paddle._C_ops.transpose(reshape_213, [2, 0, 1]) + del reshape_213 + + # pd_op.unsqueeze: (1x6x144x144xf32) <- (6x144x144xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_0) + + # pd_op.add: (256x6x144x144xf32) <- (256x6x144x144xf32, 1x6x144x144xf32) + add_170 = paddle._C_ops.add(matmul_1, unsqueeze_0) + + # pd_op.softmax: (256x6x144x144xf32) <- (256x6x144x144xf32) + softmax_0 = paddle._C_ops.softmax(add_170, -1) + del add_170 + + # pd_op.matmul: (256x6x144x32xf32) <- (256x6x144x144xf32, 256x6x144x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_0, slice_0, False, False) + + # pd_op.transpose: (256x144x6x32xf32) <- (256x6x144x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_17 = [256, 144, 192] + + # pd_op.reshape: (256x144x192xf32) <- (256x144x6x32xf32, 3xi64) + reshape_5 = paddle._C_ops.reshape(transpose_5, full_int_array_17) + + # pd_op.matmul: (256x144x192xf32) <- (256x144x192xf32, 192x192xf32) + matmul_2 = paddle._C_ops.matmul(reshape_5, parameter_296, False, False) + del parameter_296 + + # pd_op.add: (256x144x192xf32) <- (256x144x192xf32, 192xf32) + add_2 = paddle._C_ops.add(matmul_2, parameter_295) + del parameter_295 + + # pd_op.reshape: (256x12x12x192xf32) <- (256x144x192xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_2, full_int_array_12) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_18 = [-1, 8, 8, 12, 12, 192] + + # pd_op.reshape: (4x8x8x12x12x192xf32) <- (256x12x12x192xf32, 6xi64) + reshape_214 = paddle._C_ops.reshape(reshape_6, full_int_array_18) + + # pd_op.transpose: (4x8x12x8x12x192xf32) <- (4x8x8x12x12x192xf32) + transpose_6 = paddle._C_ops.transpose(reshape_214, [0, 1, 3, 2, 4, 5]) + del reshape_214 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_19 = [-1, 96, 96, 192] + + # pd_op.reshape: (4x96x96x192xf32) <- (4x8x12x8x12x192xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(transpose_6, full_int_array_19) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_20 = [4, 9216, 192] + + # pd_op.reshape: (4x9216x192xf32) <- (4x96x96x192xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(reshape_7, full_int_array_20) + + # pd_op.add: (4x9216x192xf32) <- (4x9216x192xf32, 4x9216x192xf32) + add_3 = paddle._C_ops.add(layer_norm_0, reshape_8) + + # pd_op.layer_norm: (4x9216x192xf32, 4x9216xf32, 4x9216xf32) <- (4x9216x192xf32, 192xf32, 192xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_3, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (4x9216x768xf32) <- (4x9216x192xf32, 192x768xf32) + matmul_3 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del parameter_292 + + # pd_op.add: (4x9216x768xf32) <- (4x9216x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_291) + del parameter_291 + + # pd_op.gelu: (4x9216x768xf32) <- (4x9216x768xf32) + gelu_0 = paddle._C_ops.gelu(add_4, False) + + # pd_op.matmul: (4x9216x192xf32) <- (4x9216x768xf32, 768x192xf32) + matmul_4 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del parameter_290 + + # pd_op.add: (4x9216x192xf32) <- (4x9216x192xf32, 192xf32) + add_5 = paddle._C_ops.add(matmul_4, parameter_289) + del parameter_289 + + # pd_op.add: (4x9216x192xf32) <- (4x9216x192xf32, 4x9216x192xf32) + add_6 = paddle._C_ops.add(add_3, add_5) + + # pd_op.layer_norm: (4x9216x192xf32, 4x9216xf32, 4x9216xf32) <- (4x9216x192xf32, 192xf32, 192xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # pd_op.reshape: (4x96x96x192xf32) <- (4x9216x192xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(layer_norm_9, full_int_array_10) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_4 = [-6, -6] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_256 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_223 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_203 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_183 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_163 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_143 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_123 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_103 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_83 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_63 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_30 = full_int_array_4 + + # pd_op.roll: (4x96x96x192xf32) <- (4x96x96x192xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_9, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x8x12x8x12x192xf32) <- (4x96x96x192xf32, 6xi64) + reshape_215 = paddle._C_ops.reshape(roll_0, full_int_array_11) + del full_int_array_11 + + # pd_op.transpose: (4x8x8x12x12x192xf32) <- (4x8x12x8x12x192xf32) + transpose_7 = paddle._C_ops.transpose(reshape_215, [0, 1, 3, 2, 4, 5]) + del reshape_215 + + # pd_op.reshape: (256x12x12x192xf32) <- (4x8x8x12x12x192xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(transpose_7, full_int_array_12) + + # pd_op.reshape: (256x144x192xf32) <- (256x12x12x192xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(reshape_10, full_int_array_13) + del full_int_array_13 + + # pd_op.full: (1x96x96x1xf32) <- () + full_25 = paddle._C_ops.full( + [1, 96, 96, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [0, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_234 = full_int_array_21 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_41 = full_int_array_21 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_12 = full_int_array_21 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-12, -12] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [1, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_268 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_243 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_50 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_18 = full_int_array_23 + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_25, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_25 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [0, -12] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [-12, -6] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_26 = [0, -6] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_27 = [-12, 2147483647] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [-12, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [-6, -12] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_30 = [-6, 2147483647] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_31 = [-6, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [2147483647, -12] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_33 = [2147483647, -6] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_34 = [2147483647, 2147483647] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_244 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_241 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_238 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_235 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_51 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_48 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_45 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_42 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_19 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_16 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_14 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_13 = full_int_array_34 + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [1, 8, 12, 8, 12, 1] + + # pd_op.reshape: (1x8x12x8x12x1xf32) <- (1x96x96x1xf32, 6xi64) + reshape_216 = paddle._C_ops.reshape(set_value__0, full_int_array_35) + del full_int_array_35 + + # pd_op.transpose: (1x8x8x12x12x1xf32) <- (1x8x12x8x12x1xf32) + transpose_146 = paddle._C_ops.transpose(reshape_216, [0, 1, 3, 2, 4, 5]) + del reshape_216 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 12, 12, 1] + + # pd_op.reshape: (64x12x12x1xf32) <- (1x8x8x12x12x1xf32, 4xi64) + reshape_217 = paddle._C_ops.reshape(transpose_146, full_int_array_36) + del transpose_146 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_37 = [-1, 144] + + # pd_op.reshape: (64x144xf32) <- (64x12x12x1xf32, 2xi64) + reshape_218 = paddle._C_ops.reshape(reshape_217, full_int_array_37) + del reshape_217 + + # pd_op.unsqueeze: (64x1x144xf32) <- (64x144xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_1) + + # pd_op.unsqueeze: (64x144x1xf32) <- (64x144xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_2) + del reshape_218 + + # pd_op.subtract: (64x144x144xf32) <- (64x1x144xf32, 64x144x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.full: (xf32) <- () + full_26 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x144x144xb) <- (64x144x144xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_26) + + # pd_op.full: (64x144x144xf32) <- () + full_27 = paddle._C_ops.full( + [64, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x144x144xf32) <- (64x144x144xb, 64x144x144xf32, 64x144x144xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_27, subtract_0) + del full_27, not_equal_0, subtract_0 + + # pd_op.equal: (64x144x144xb) <- (64x144x144xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_26) + + # pd_op.full: (64x144x144xf32) <- () + full_28 = paddle._C_ops.full( + [64, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x144x144xf32) <- (64x144x144xb, 64x144x144xf32, 64x144x144xf32) + where_1 = paddle._C_ops.where(equal_0, full_28, where_0) + del equal_0, full_28, where_0 + + # pd_op.matmul: (256x144x576xf32) <- (256x144x192xf32, 192x576xf32) + matmul_5 = paddle._C_ops.matmul(reshape_11, parameter_286, False, False) + del parameter_286 + + # pd_op.add: (256x144x576xf32) <- (256x144x576xf32, 576xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_285) + del parameter_285 + + # pd_op.reshape: (256x144x3x6x32xf32) <- (256x144x576xf32, 5xi64) + reshape_219 = paddle._C_ops.reshape(add_7, full_int_array_14) + del full_int_array_14 + + # pd_op.transpose: (3x256x6x144x32xf32) <- (256x144x3x6x32xf32) + transpose_8 = paddle._C_ops.transpose(reshape_219, [2, 0, 3, 1, 4]) + del reshape_219 + + # pd_op.slice: (256x6x144x32xf32) <- (3x256x6x144x32xf32, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x6x144x32xf32) <- (3x256x6x144x32xf32, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x6x144x32xf32) <- (3x256x6x144x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x6x144x32xf32) <- (256x6x144x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_26, full_0, float("0"), True) + del slice_26 + + # pd_op.transpose: (256x6x32x144xf32) <- (256x6x144x32xf32) + transpose_9 = paddle._C_ops.transpose(slice_27, [0, 1, 3, 2]) + del slice_27 + + # pd_op.matmul: (256x6x144x144xf32) <- (256x6x144x32xf32, 256x6x32x144xf32) + matmul_6 = paddle._C_ops.matmul(scale_1, transpose_9, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_12 = paddle._C_ops.reshape(data_43, full_int_array_15) + del data_43 + + # pd_op.index_select: (20736x6xf32) <- (529x6xf32, 20736xi64) + index_select_1 = paddle._C_ops.index_select(data_46, reshape_12, 0) + del data_46 + + # pd_op.reshape: (144x144x6xf32) <- (20736x6xf32, 3xi64) + reshape_220 = paddle._C_ops.reshape(index_select_1, full_int_array_16) + + # pd_op.transpose: (6x144x144xf32) <- (144x144x6xf32) + transpose_10 = paddle._C_ops.transpose(reshape_220, [2, 0, 1]) + del reshape_220 + + # pd_op.unsqueeze: (1x6x144x144xf32) <- (6x144x144xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(transpose_10, full_int_array_0) + + # pd_op.add: (256x6x144x144xf32) <- (256x6x144x144xf32, 1x6x144x144xf32) + add_8 = paddle._C_ops.add(matmul_6, unsqueeze_1) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_38 = [4, 64, 6, 144, 144] + + # pd_op.reshape: (4x64x6x144x144xf32) <- (256x6x144x144xf32, 5xi64) + reshape_13 = paddle._C_ops.reshape(add_8, full_int_array_38) + del full_int_array_38 + + # pd_op.unsqueeze: (64x1x144x144xf32) <- (64x144x144xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(where_1, full_int_array_1) + del where_1 + + # pd_op.unsqueeze: (1x64x1x144x144xf32) <- (64x1x144x144xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(unsqueeze_39, full_int_array_0) + del unsqueeze_39 + + # pd_op.add: (4x64x6x144x144xf32) <- (4x64x6x144x144xf32, 1x64x1x144x144xf32) + add_9 = paddle._C_ops.add(reshape_13, unsqueeze_2) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_39 = [256, 6, 144, 144] + + # pd_op.reshape: (256x6x144x144xf32) <- (4x64x6x144x144xf32, 4xi64) + reshape_221 = paddle._C_ops.reshape(add_9, full_int_array_39) + del full_int_array_39 + + # pd_op.softmax: (256x6x144x144xf32) <- (256x6x144x144xf32) + softmax_1 = paddle._C_ops.softmax(reshape_221, -1) + del reshape_221 + + # pd_op.matmul: (256x6x144x32xf32) <- (256x6x144x144xf32, 256x6x144x32xf32) + matmul_125 = paddle._C_ops.matmul(softmax_1, slice_1, False, False) + + # pd_op.transpose: (256x144x6x32xf32) <- (256x6x144x32xf32) + transpose_11 = paddle._C_ops.transpose(matmul_125, [0, 2, 1, 3]) + del matmul_125 + + # pd_op.reshape: (256x144x192xf32) <- (256x144x6x32xf32, 3xi64) + reshape_14 = paddle._C_ops.reshape(transpose_11, full_int_array_17) + del full_int_array_17 + + # pd_op.matmul: (256x144x192xf32) <- (256x144x192xf32, 192x192xf32) + matmul_7 = paddle._C_ops.matmul(reshape_14, parameter_284, False, False) + del parameter_284 + + # pd_op.add: (256x144x192xf32) <- (256x144x192xf32, 192xf32) + add_10 = paddle._C_ops.add(matmul_7, parameter_283) + del parameter_283 + + # pd_op.reshape: (256x12x12x192xf32) <- (256x144x192xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(add_10, full_int_array_12) + del full_int_array_12 + + # pd_op.reshape: (4x8x8x12x12x192xf32) <- (256x12x12x192xf32, 6xi64) + reshape_222 = paddle._C_ops.reshape(reshape_15, full_int_array_18) + del full_int_array_18 + + # pd_op.transpose: (4x8x12x8x12x192xf32) <- (4x8x8x12x12x192xf32) + transpose_12 = paddle._C_ops.transpose(reshape_222, [0, 1, 3, 2, 4, 5]) + del reshape_222 + + # pd_op.reshape: (4x96x96x192xf32) <- (4x8x12x8x12x192xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(transpose_12, full_int_array_19) + del full_int_array_19 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [6, 6] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_265 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_232 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_212 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_192 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_172 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_152 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_132 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_112 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_92 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_72 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_39 = full_int_array_5 + + # pd_op.roll: (4x96x96x192xf32) <- (4x96x96x192xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_16, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x9216x192xf32) <- (4x96x96x192xf32, 3xi64) + reshape_17 = paddle._C_ops.reshape(roll_1, full_int_array_20) + del full_int_array_20 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], + float("0.995652"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_11 = full_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_40 = [4, 1, 1] + + # pd_op.full: (1xf32) <- () + full_29 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf32) <- () + full_30 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_0 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_171 = paddle._C_ops.add(full_1, uniform_0) + del uniform_0 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_0 = paddle._C_ops.floor(add_171) + del add_171 + + # pd_op.divide: (4x9216x192xf32) <- (4x9216x192xf32, xf32) + divide_0 = paddle._C_ops.divide(reshape_17, full_1) + + # pd_op.multiply: (4x9216x192xf32) <- (4x9216x192xf32, 4x1x1xf32) + multiply_0 = paddle._C_ops.multiply(divide_0, floor_0) + + # pd_op.add: (4x9216x192xf32) <- (4x9216x192xf32, 4x9216x192xf32) + add_11 = paddle._C_ops.add(add_6, multiply_0) + + # pd_op.layer_norm: (4x9216x192xf32, 4x9216xf32, 4x9216xf32) <- (4x9216x192xf32, 192xf32, 192xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (4x9216x768xf32) <- (4x9216x192xf32, 192x768xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del parameter_280 + + # pd_op.add: (4x9216x768xf32) <- (4x9216x768xf32, 768xf32) + add_12 = paddle._C_ops.add(matmul_8, parameter_279) + del parameter_279 + + # pd_op.gelu: (4x9216x768xf32) <- (4x9216x768xf32) + gelu_1 = paddle._C_ops.gelu(add_12, False) + + # pd_op.matmul: (4x9216x192xf32) <- (4x9216x768xf32, 768x192xf32) + matmul_9 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del parameter_278 + + # pd_op.add: (4x9216x192xf32) <- (4x9216x192xf32, 192xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_277) + del parameter_277 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_1 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_172 = paddle._C_ops.add(full_1, uniform_1) + del uniform_1 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_1 = paddle._C_ops.floor(add_172) + del add_172 + + # pd_op.divide: (4x9216x192xf32) <- (4x9216x192xf32, xf32) + divide_1 = paddle._C_ops.divide(add_13, full_1) + + # pd_op.multiply: (4x9216x192xf32) <- (4x9216x192xf32, 4x1x1xf32) + multiply_1 = paddle._C_ops.multiply(divide_1, floor_1) + + # pd_op.add: (4x9216x192xf32) <- (4x9216x192xf32, 4x9216x192xf32) + add_14 = paddle._C_ops.add(add_11, multiply_1) + + # pd_op.reshape: (4x96x96x192xf32) <- (4x9216x192xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_14, full_int_array_10) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_6 = [2, 2] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_245 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_242 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_239 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_236 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_52 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_49 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_46 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_43 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_20 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_17 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_15 = full_int_array_6 + + # pd_op.strided_slice: (4x48x48x192xf32) <- (4x96x96x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_7 = [1, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_237 = full_int_array_7 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_44 = full_int_array_7 + + # pd_op.strided_slice: (4x48x48x192xf32) <- (4x96x96x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_8 = [0, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_240 = full_int_array_8 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_47 = full_int_array_8 + + # pd_op.strided_slice: (4x48x48x192xf32) <- (4x96x96x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x48x48x192xf32) <- (4x96x96x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (4x96x96x192xf32) <- (4x96x96x192xf32, 4xi64) + reshape_223 = paddle._C_ops.reshape(reshape_18, full_int_array_10) + del full_int_array_10 + + # pd_op.full: (1xi32) <- () + full_2 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xi32) <- (1xi32) + assign_246 = full_2 + + # pd_op.assign: (1xi32) <- (1xi32) + assign_53 = full_2 + + # builtin.combine: ([4x48x48x192xf32, 4x48x48x192xf32, 4x48x48x192xf32, 4x48x48x192xf32]) <- (4x48x48x192xf32, 4x48x48x192xf32, 4x48x48x192xf32, 4x48x48x192xf32) + combine_0 = [strided_slice_0, strided_slice_1, strided_slice_2, strided_slice_3] + + # pd_op.concat: (4x48x48x768xf32) <- ([4x48x48x192xf32, 4x48x48x192xf32, 4x48x48x192xf32, 4x48x48x192xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_0, full_2) + del combine_0 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_41 = [4, -1, 768] + + # pd_op.reshape: (4x2304x768xf32) <- (4x48x48x768xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(concat_0, full_int_array_41) + del full_int_array_41 + + # pd_op.layer_norm: (4x2304x768xf32, 4x2304xf32, 4x2304xf32) <- (4x2304x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_19, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276 + + # pd_op.matmul: (4x2304x384xf32) <- (4x2304x768xf32, 768x384xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del parameter_274 + + # pd_op.layer_norm: (4x2304x384xf32, 4x2304xf32, 4x2304xf32) <- (4x2304x384xf32, 384xf32, 384xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_10, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_42 = [4, 48, 48, 384] + + # pd_op.reshape: (4x48x48x384xf32) <- (4x2304x384xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(layer_norm_18, full_int_array_42) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_43 = [4, 4, 12, 4, 12, 384] + + # pd_op.reshape: (4x4x12x4x12x384xf32) <- (4x48x48x384xf32, 6xi64) + reshape_224 = paddle._C_ops.reshape(reshape_20, full_int_array_43) + + # pd_op.transpose: (4x4x4x12x12x384xf32) <- (4x4x12x4x12x384xf32) + transpose_13 = paddle._C_ops.transpose(reshape_224, [0, 1, 3, 2, 4, 5]) + del reshape_224 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_44 = [-1, 12, 12, 384] + + # pd_op.reshape: (64x12x12x384xf32) <- (4x4x4x12x12x384xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(transpose_13, full_int_array_44) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_45 = [-1, 144, 384] + + # pd_op.reshape: (64x144x384xf32) <- (64x12x12x384xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(reshape_21, full_int_array_45) + + # pd_op.matmul: (64x144x1152xf32) <- (64x144x384xf32, 384x1152xf32) + matmul_11 = paddle._C_ops.matmul(reshape_22, parameter_271, False, False) + del parameter_271 + + # pd_op.add: (64x144x1152xf32) <- (64x144x1152xf32, 1152xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_270) + del parameter_270 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_46 = [64, 144, 3, 12, 32] + + # pd_op.reshape: (64x144x3x12x32xf32) <- (64x144x1152xf32, 5xi64) + reshape_225 = paddle._C_ops.reshape(add_15, full_int_array_46) + + # pd_op.transpose: (3x64x12x144x32xf32) <- (64x144x3x12x32xf32) + transpose_14 = paddle._C_ops.transpose(reshape_225, [2, 0, 3, 1, 4]) + del reshape_225 + + # pd_op.slice: (64x12x144x32xf32) <- (3x64x12x144x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x12x144x32xf32) <- (3x64x12x144x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x12x144x32xf32) <- (3x64x12x144x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x12x144x32xf32) <- (64x12x144x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_28, full_0, float("0"), True) + del slice_28 + + # pd_op.transpose: (64x12x32x144xf32) <- (64x12x144x32xf32) + transpose_15 = paddle._C_ops.transpose(slice_29, [0, 1, 3, 2]) + del slice_29 + + # pd_op.matmul: (64x12x144x144xf32) <- (64x12x144x32xf32, 64x12x32x144xf32) + matmul_12 = paddle._C_ops.matmul(scale_2, transpose_15, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_23 = paddle._C_ops.reshape(data_17, full_int_array_15) + del data_17 + + # pd_op.index_select: (20736x12xf32) <- (529x12xf32, 20736xi64) + index_select_2 = paddle._C_ops.index_select(data_20, reshape_23, 0) + del data_20 + + # pd_op.reshape: (144x144x12xf32) <- (20736x12xf32, 3xi64) + reshape_226 = paddle._C_ops.reshape(index_select_2, full_int_array_16) + + # pd_op.transpose: (12x144x144xf32) <- (144x144x12xf32) + transpose_16 = paddle._C_ops.transpose(reshape_226, [2, 0, 1]) + del reshape_226 + + # pd_op.unsqueeze: (1x12x144x144xf32) <- (12x144x144xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_16, full_int_array_0) + + # pd_op.add: (64x12x144x144xf32) <- (64x12x144x144xf32, 1x12x144x144xf32) + add_173 = paddle._C_ops.add(matmul_12, unsqueeze_3) + + # pd_op.softmax: (64x12x144x144xf32) <- (64x12x144x144xf32) + softmax_2 = paddle._C_ops.softmax(add_173, -1) + del add_173 + + # pd_op.matmul: (64x12x144x32xf32) <- (64x12x144x144xf32, 64x12x144x32xf32) + matmul_126 = paddle._C_ops.matmul(softmax_2, slice_2, False, False) + + # pd_op.transpose: (64x144x12x32xf32) <- (64x12x144x32xf32) + transpose_17 = paddle._C_ops.transpose(matmul_126, [0, 2, 1, 3]) + del matmul_126 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_47 = [64, 144, 384] + + # pd_op.reshape: (64x144x384xf32) <- (64x144x12x32xf32, 3xi64) + reshape_24 = paddle._C_ops.reshape(transpose_17, full_int_array_47) + + # pd_op.matmul: (64x144x384xf32) <- (64x144x384xf32, 384x384xf32) + matmul_13 = paddle._C_ops.matmul(reshape_24, parameter_269, False, False) + del parameter_269 + + # pd_op.add: (64x144x384xf32) <- (64x144x384xf32, 384xf32) + add_16 = paddle._C_ops.add(matmul_13, parameter_268) + del parameter_268 + + # pd_op.reshape: (64x12x12x384xf32) <- (64x144x384xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_16, full_int_array_44) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_48 = [-1, 4, 4, 12, 12, 384] + + # pd_op.reshape: (4x4x4x12x12x384xf32) <- (64x12x12x384xf32, 6xi64) + reshape_227 = paddle._C_ops.reshape(reshape_25, full_int_array_48) + + # pd_op.transpose: (4x4x12x4x12x384xf32) <- (4x4x4x12x12x384xf32) + transpose_18 = paddle._C_ops.transpose(reshape_227, [0, 1, 3, 2, 4, 5]) + del reshape_227 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_49 = [-1, 48, 48, 384] + + # pd_op.reshape: (4x48x48x384xf32) <- (4x4x12x4x12x384xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(transpose_18, full_int_array_49) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_50 = [4, 2304, 384] + + # pd_op.reshape: (4x2304x384xf32) <- (4x48x48x384xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, full_int_array_50) + + # pd_op.full: (xf32) <- () + full_3 = paddle._C_ops.full( + [], + float("0.991304"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_29 = full_3 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_2 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_174 = paddle._C_ops.add(full_3, uniform_2) + del uniform_2 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_2 = paddle._C_ops.floor(add_174) + del add_174 + + # pd_op.divide: (4x2304x384xf32) <- (4x2304x384xf32, xf32) + divide_2 = paddle._C_ops.divide(reshape_27, full_3) + + # pd_op.multiply: (4x2304x384xf32) <- (4x2304x384xf32, 4x1x1xf32) + multiply_2 = paddle._C_ops.multiply(divide_2, floor_2) + + # pd_op.add: (4x2304x384xf32) <- (4x2304x384xf32, 4x2304x384xf32) + add_17 = paddle._C_ops.add(matmul_10, multiply_2) + + # pd_op.layer_norm: (4x2304x384xf32, 4x2304xf32, 4x2304xf32) <- (4x2304x384xf32, 384xf32, 384xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (4x2304x1536xf32) <- (4x2304x384xf32, 384x1536xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del parameter_265 + + # pd_op.add: (4x2304x1536xf32) <- (4x2304x1536xf32, 1536xf32) + add_18 = paddle._C_ops.add(matmul_14, parameter_264) + del parameter_264 + + # pd_op.gelu: (4x2304x1536xf32) <- (4x2304x1536xf32) + gelu_2 = paddle._C_ops.gelu(add_18, False) + + # pd_op.matmul: (4x2304x384xf32) <- (4x2304x1536xf32, 1536x384xf32) + matmul_15 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del parameter_263 + + # pd_op.add: (4x2304x384xf32) <- (4x2304x384xf32, 384xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_262) + del parameter_262 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_3 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_175 = paddle._C_ops.add(full_3, uniform_3) + del uniform_3 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_3 = paddle._C_ops.floor(add_175) + del add_175 + + # pd_op.divide: (4x2304x384xf32) <- (4x2304x384xf32, xf32) + divide_3 = paddle._C_ops.divide(add_19, full_3) + + # pd_op.multiply: (4x2304x384xf32) <- (4x2304x384xf32, 4x1x1xf32) + multiply_3 = paddle._C_ops.multiply(divide_3, floor_3) + + # pd_op.add: (4x2304x384xf32) <- (4x2304x384xf32, 4x2304x384xf32) + add_20 = paddle._C_ops.add(add_17, multiply_3) + + # pd_op.layer_norm: (4x2304x384xf32, 4x2304xf32, 4x2304xf32) <- (4x2304x384xf32, 384xf32, 384xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # pd_op.reshape: (4x48x48x384xf32) <- (4x2304x384xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(layer_norm_24, full_int_array_42) + + # pd_op.roll: (4x48x48x384xf32) <- (4x48x48x384xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_28, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x4x12x4x12x384xf32) <- (4x48x48x384xf32, 6xi64) + reshape_228 = paddle._C_ops.reshape(roll_2, full_int_array_43) + del full_int_array_43 + + # pd_op.transpose: (4x4x4x12x12x384xf32) <- (4x4x12x4x12x384xf32) + transpose_19 = paddle._C_ops.transpose(reshape_228, [0, 1, 3, 2, 4, 5]) + del reshape_228 + + # pd_op.reshape: (64x12x12x384xf32) <- (4x4x4x12x12x384xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(transpose_19, full_int_array_44) + + # pd_op.reshape: (64x144x384xf32) <- (64x12x12x384xf32, 3xi64) + reshape_30 = paddle._C_ops.reshape(reshape_29, full_int_array_45) + del full_int_array_45 + + # pd_op.full: (1x48x48x1xf32) <- () + full_31 = paddle._C_ops.full( + [1, 48, 48, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_31, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_31 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_51 = [1, 4, 12, 4, 12, 1] + + # pd_op.reshape: (1x4x12x4x12x1xf32) <- (1x48x48x1xf32, 6xi64) + reshape_229 = paddle._C_ops.reshape(set_value__1, full_int_array_51) + del full_int_array_51 + + # pd_op.transpose: (1x4x4x12x12x1xf32) <- (1x4x12x4x12x1xf32) + transpose_147 = paddle._C_ops.transpose(reshape_229, [0, 1, 3, 2, 4, 5]) + del reshape_229 + + # pd_op.reshape: (16x12x12x1xf32) <- (1x4x4x12x12x1xf32, 4xi64) + reshape_230 = paddle._C_ops.reshape(transpose_147, full_int_array_36) + del transpose_147 + + # pd_op.reshape: (16x144xf32) <- (16x12x12x1xf32, 2xi64) + reshape_231 = paddle._C_ops.reshape(reshape_230, full_int_array_37) + del reshape_230 + + # pd_op.unsqueeze: (16x1x144xf32) <- (16x144xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_1) + + # pd_op.unsqueeze: (16x144x1xf32) <- (16x144xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_2) + del reshape_231 + + # pd_op.subtract: (16x144x144xf32) <- (16x1x144xf32, 16x144x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_40, unsqueeze_41) + del unsqueeze_40, unsqueeze_41 + + # pd_op.not_equal: (16x144x144xb) <- (16x144x144xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_26) + + # pd_op.full: (16x144x144xf32) <- () + full_32 = paddle._C_ops.full( + [16, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x144x144xf32) <- (16x144x144xb, 16x144x144xf32, 16x144x144xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_32, subtract_1) + del full_32, not_equal_1, subtract_1 + + # pd_op.equal: (16x144x144xb) <- (16x144x144xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_26) + + # pd_op.full: (16x144x144xf32) <- () + full_33 = paddle._C_ops.full( + [16, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x144x144xf32) <- (16x144x144xb, 16x144x144xf32, 16x144x144xf32) + where_3 = paddle._C_ops.where(equal_1, full_33, where_2) + del equal_1, full_33, where_2 + + # pd_op.matmul: (64x144x1152xf32) <- (64x144x384xf32, 384x1152xf32) + matmul_16 = paddle._C_ops.matmul(reshape_30, parameter_259, False, False) + del parameter_259 + + # pd_op.add: (64x144x1152xf32) <- (64x144x1152xf32, 1152xf32) + add_21 = paddle._C_ops.add(matmul_16, parameter_258) + del parameter_258 + + # pd_op.reshape: (64x144x3x12x32xf32) <- (64x144x1152xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_21, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (3x64x12x144x32xf32) <- (64x144x3x12x32xf32) + transpose_20 = paddle._C_ops.transpose(reshape_232, [2, 0, 3, 1, 4]) + del reshape_232 + + # pd_op.slice: (64x12x144x32xf32) <- (3x64x12x144x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x12x144x32xf32) <- (3x64x12x144x32xf32, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x12x144x32xf32) <- (3x64x12x144x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x12x144x32xf32) <- (64x12x144x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_30, full_0, float("0"), True) + del slice_30 + + # pd_op.transpose: (64x12x32x144xf32) <- (64x12x144x32xf32) + transpose_21 = paddle._C_ops.transpose(slice_31, [0, 1, 3, 2]) + del slice_31 + + # pd_op.matmul: (64x12x144x144xf32) <- (64x12x144x32xf32, 64x12x32x144xf32) + matmul_17 = paddle._C_ops.matmul(scale_3, transpose_21, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_31 = paddle._C_ops.reshape(data_25, full_int_array_15) + del data_25 + + # pd_op.index_select: (20736x12xf32) <- (529x12xf32, 20736xi64) + index_select_3 = paddle._C_ops.index_select(data_26, reshape_31, 0) + del data_26 + + # pd_op.reshape: (144x144x12xf32) <- (20736x12xf32, 3xi64) + reshape_233 = paddle._C_ops.reshape(index_select_3, full_int_array_16) + + # pd_op.transpose: (12x144x144xf32) <- (144x144x12xf32) + transpose_22 = paddle._C_ops.transpose(reshape_233, [2, 0, 1]) + del reshape_233 + + # pd_op.unsqueeze: (1x12x144x144xf32) <- (12x144x144xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_22, full_int_array_0) + + # pd_op.add: (64x12x144x144xf32) <- (64x12x144x144xf32, 1x12x144x144xf32) + add_22 = paddle._C_ops.add(matmul_17, unsqueeze_4) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_52 = [4, 16, 12, 144, 144] + + # pd_op.reshape: (4x16x12x144x144xf32) <- (64x12x144x144xf32, 5xi64) + reshape_32 = paddle._C_ops.reshape(add_22, full_int_array_52) + del full_int_array_52 + + # pd_op.unsqueeze: (16x1x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(where_3, full_int_array_1) + del where_3 + + # pd_op.unsqueeze: (1x16x1x144x144xf32) <- (16x1x144x144xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_42, full_int_array_0) + del unsqueeze_42 + + # pd_op.add: (4x16x12x144x144xf32) <- (4x16x12x144x144xf32, 1x16x1x144x144xf32) + add_23 = paddle._C_ops.add(reshape_32, unsqueeze_5) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_53 = [64, 12, 144, 144] + + # pd_op.reshape: (64x12x144x144xf32) <- (4x16x12x144x144xf32, 4xi64) + reshape_234 = paddle._C_ops.reshape(add_23, full_int_array_53) + del full_int_array_53 + + # pd_op.softmax: (64x12x144x144xf32) <- (64x12x144x144xf32) + softmax_3 = paddle._C_ops.softmax(reshape_234, -1) + del reshape_234 + + # pd_op.matmul: (64x12x144x32xf32) <- (64x12x144x144xf32, 64x12x144x32xf32) + matmul_127 = paddle._C_ops.matmul(softmax_3, slice_3, False, False) + + # pd_op.transpose: (64x144x12x32xf32) <- (64x12x144x32xf32) + transpose_23 = paddle._C_ops.transpose(matmul_127, [0, 2, 1, 3]) + del matmul_127 + + # pd_op.reshape: (64x144x384xf32) <- (64x144x12x32xf32, 3xi64) + reshape_33 = paddle._C_ops.reshape(transpose_23, full_int_array_47) + del full_int_array_47 + + # pd_op.matmul: (64x144x384xf32) <- (64x144x384xf32, 384x384xf32) + matmul_18 = paddle._C_ops.matmul(reshape_33, parameter_257, False, False) + del parameter_257 + + # pd_op.add: (64x144x384xf32) <- (64x144x384xf32, 384xf32) + add_24 = paddle._C_ops.add(matmul_18, parameter_256) + del parameter_256 + + # pd_op.reshape: (64x12x12x384xf32) <- (64x144x384xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_24, full_int_array_44) + del full_int_array_44 + + # pd_op.reshape: (4x4x4x12x12x384xf32) <- (64x12x12x384xf32, 6xi64) + reshape_235 = paddle._C_ops.reshape(reshape_34, full_int_array_48) + del full_int_array_48 + + # pd_op.transpose: (4x4x12x4x12x384xf32) <- (4x4x4x12x12x384xf32) + transpose_24 = paddle._C_ops.transpose(reshape_235, [0, 1, 3, 2, 4, 5]) + del reshape_235 + + # pd_op.reshape: (4x48x48x384xf32) <- (4x4x12x4x12x384xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_24, full_int_array_49) + del full_int_array_49 + + # pd_op.roll: (4x48x48x384xf32) <- (4x48x48x384xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_35, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x2304x384xf32) <- (4x48x48x384xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(roll_3, full_int_array_50) + del full_int_array_50 + + # pd_op.full: (xf32) <- () + full_4 = paddle._C_ops.full( + [], + float("0.986957"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_40 = full_4 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_4 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_176 = paddle._C_ops.add(full_4, uniform_4) + del uniform_4 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_4 = paddle._C_ops.floor(add_176) + del add_176 + + # pd_op.divide: (4x2304x384xf32) <- (4x2304x384xf32, xf32) + divide_4 = paddle._C_ops.divide(reshape_36, full_4) + + # pd_op.multiply: (4x2304x384xf32) <- (4x2304x384xf32, 4x1x1xf32) + multiply_4 = paddle._C_ops.multiply(divide_4, floor_4) + + # pd_op.add: (4x2304x384xf32) <- (4x2304x384xf32, 4x2304x384xf32) + add_25 = paddle._C_ops.add(add_20, multiply_4) + + # pd_op.layer_norm: (4x2304x384xf32, 4x2304xf32, 4x2304xf32) <- (4x2304x384xf32, 384xf32, 384xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (4x2304x1536xf32) <- (4x2304x384xf32, 384x1536xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del parameter_253 + + # pd_op.add: (4x2304x1536xf32) <- (4x2304x1536xf32, 1536xf32) + add_26 = paddle._C_ops.add(matmul_19, parameter_252) + del parameter_252 + + # pd_op.gelu: (4x2304x1536xf32) <- (4x2304x1536xf32) + gelu_3 = paddle._C_ops.gelu(add_26, False) + + # pd_op.matmul: (4x2304x384xf32) <- (4x2304x1536xf32, 1536x384xf32) + matmul_20 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del parameter_251 + + # pd_op.add: (4x2304x384xf32) <- (4x2304x384xf32, 384xf32) + add_27 = paddle._C_ops.add(matmul_20, parameter_250) + del parameter_250 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_5 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_177 = paddle._C_ops.add(full_4, uniform_5) + del uniform_5 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_5 = paddle._C_ops.floor(add_177) + del add_177 + + # pd_op.divide: (4x2304x384xf32) <- (4x2304x384xf32, xf32) + divide_5 = paddle._C_ops.divide(add_27, full_4) + + # pd_op.multiply: (4x2304x384xf32) <- (4x2304x384xf32, 4x1x1xf32) + multiply_5 = paddle._C_ops.multiply(divide_5, floor_5) + + # pd_op.add: (4x2304x384xf32) <- (4x2304x384xf32, 4x2304x384xf32) + add_28 = paddle._C_ops.add(add_25, multiply_5) + + # pd_op.reshape: (4x48x48x384xf32) <- (4x2304x384xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_28, full_int_array_42) + + # pd_op.strided_slice: (4x24x24x384xf32) <- (4x48x48x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x24x24x384xf32) <- (4x48x48x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x24x24x384xf32) <- (4x48x48x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x24x24x384xf32) <- (4x48x48x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (4x48x48x384xf32) <- (4x48x48x384xf32, 4xi64) + reshape_236 = paddle._C_ops.reshape(reshape_37, full_int_array_42) + del full_int_array_42 + + # builtin.combine: ([4x24x24x384xf32, 4x24x24x384xf32, 4x24x24x384xf32, 4x24x24x384xf32]) <- (4x24x24x384xf32, 4x24x24x384xf32, 4x24x24x384xf32, 4x24x24x384xf32) + combine_1 = [strided_slice_4, strided_slice_5, strided_slice_6, strided_slice_7] + + # pd_op.concat: (4x24x24x1536xf32) <- ([4x24x24x384xf32, 4x24x24x384xf32, 4x24x24x384xf32, 4x24x24x384xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_1, full_2) + del combine_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_54 = [4, -1, 1536] + + # pd_op.reshape: (4x576x1536xf32) <- (4x24x24x1536xf32, 3xi64) + reshape_38 = paddle._C_ops.reshape(concat_1, full_int_array_54) + del full_int_array_54 + + # pd_op.layer_norm: (4x576x1536xf32, 4x576xf32, 4x576xf32) <- (4x576x1536xf32, 1536xf32, 1536xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_38, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249 + + # pd_op.matmul: (4x576x768xf32) <- (4x576x1536xf32, 1536x768xf32) + matmul_21 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del parameter_247 + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_21, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_55 = [4, 24, 24, 768] + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(layer_norm_33, full_int_array_55) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_56 = [4, 2, 12, 2, 12, 768] + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_237 = paddle._C_ops.reshape(reshape_39, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_25 = paddle._C_ops.transpose(reshape_237, [0, 1, 3, 2, 4, 5]) + del reshape_237 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_57 = [-1, 12, 12, 768] + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(transpose_25, full_int_array_57) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_58 = [-1, 144, 768] + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(reshape_40, full_int_array_58) + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_22 = paddle._C_ops.matmul(reshape_41, parameter_244, False, False) + del parameter_244 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_29 = paddle._C_ops.add(matmul_22, parameter_243) + del parameter_243 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_59 = [16, 144, 3, 24, 32] + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_238 = paddle._C_ops.reshape(add_29, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_26 = paddle._C_ops.transpose(reshape_238, [2, 0, 3, 1, 4]) + del reshape_238 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_32, full_0, float("0"), True) + del slice_32 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_27 = paddle._C_ops.transpose(slice_33, [0, 1, 3, 2]) + del slice_33 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_23 = paddle._C_ops.matmul(scale_4, transpose_27, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_42 = paddle._C_ops.reshape(data_27, full_int_array_15) + del data_27 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_4 = paddle._C_ops.index_select(data_28, reshape_42, 0) + del data_28 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_239 = paddle._C_ops.reshape(index_select_4, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_28 = paddle._C_ops.transpose(reshape_239, [2, 0, 1]) + del reshape_239 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_28, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_178 = paddle._C_ops.add(matmul_23, unsqueeze_6) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_4 = paddle._C_ops.softmax(add_178, -1) + del add_178 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_128 = paddle._C_ops.matmul(softmax_4, slice_4, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_29 = paddle._C_ops.transpose(matmul_128, [0, 2, 1, 3]) + del matmul_128 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_60 = [16, 144, 768] + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_29, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_24 = paddle._C_ops.matmul(reshape_43, parameter_242, False, False) + del parameter_242 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_241) + del parameter_241 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_30, full_int_array_57) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_61 = [-1, 2, 2, 12, 12, 768] + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_44, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_30 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_62 = [-1, 24, 24, 768] + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(transpose_30, full_int_array_62) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_63 = [4, 576, 768] + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_46 = paddle._C_ops.reshape(reshape_45, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_5 = paddle._C_ops.full( + [], + float("0.982609"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_62 = full_5 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_6 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_179 = paddle._C_ops.add(full_5, uniform_6) + del uniform_6 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_6 = paddle._C_ops.floor(add_179) + del add_179 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_6 = paddle._C_ops.divide(reshape_46, full_5) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_6 = paddle._C_ops.multiply(divide_6, floor_6) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_31 = paddle._C_ops.add(matmul_21, multiply_6) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_31, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del parameter_238 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_32 = paddle._C_ops.add(matmul_25, parameter_237) + del parameter_237 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_32, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_26 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del parameter_236 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_33 = paddle._C_ops.add(matmul_26, parameter_235) + del parameter_235 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_7 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_180 = paddle._C_ops.add(full_5, uniform_7) + del uniform_7 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_7 = paddle._C_ops.floor(add_180) + del add_180 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_7 = paddle._C_ops.divide(add_33, full_5) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_7 = paddle._C_ops.multiply(divide_7, floor_7) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_34 = paddle._C_ops.add(add_31, multiply_7) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(layer_norm_39, full_int_array_55) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_47, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_241 = paddle._C_ops.reshape(roll_4, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_31 = paddle._C_ops.transpose(reshape_241, [0, 1, 3, 2, 4, 5]) + del reshape_241 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(transpose_31, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_49 = paddle._C_ops.reshape(reshape_48, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_34 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_34, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_34 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_64 = [1, 2, 12, 2, 12, 1] + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_242 = paddle._C_ops.reshape(set_value__2, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_148 = paddle._C_ops.transpose(reshape_242, [0, 1, 3, 2, 4, 5]) + del reshape_242 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_243 = paddle._C_ops.reshape(transpose_148, full_int_array_36) + del transpose_148 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_244 = paddle._C_ops.reshape(reshape_243, full_int_array_37) + del reshape_243 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_2) + del reshape_244 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_26) + + # pd_op.full: (4x144x144xf32) <- () + full_35 = paddle._C_ops.full( + [4, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_35, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_26) + + # pd_op.full: (4x144x144xf32) <- () + full_36 = paddle._C_ops.full( + [4, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_5 = paddle._C_ops.where(equal_2, full_36, where_4) + del equal_2, where_4 + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_27 = paddle._C_ops.matmul(reshape_49, parameter_232, False, False) + del parameter_232 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_35 = paddle._C_ops.add(matmul_27, parameter_231) + del parameter_231 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_245 = paddle._C_ops.reshape(add_35, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_32 = paddle._C_ops.transpose(reshape_245, [2, 0, 3, 1, 4]) + del reshape_245 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_34, full_0, float("0"), True) + del slice_34 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_33 = paddle._C_ops.transpose(slice_35, [0, 1, 3, 2]) + del slice_35 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_28 = paddle._C_ops.matmul(scale_5, transpose_33, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_50 = paddle._C_ops.reshape(data_29, full_int_array_15) + del data_29 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_5 = paddle._C_ops.index_select(data_30, reshape_50, 0) + del data_30 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(index_select_5, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_34 = paddle._C_ops.transpose(reshape_246, [2, 0, 1]) + del reshape_246 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(transpose_34, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_36 = paddle._C_ops.add(matmul_28, unsqueeze_7) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_65 = [4, 4, 24, 144, 144] + + # pd_op.reshape: (4x4x24x144x144xf32) <- (16x24x144x144xf32, 5xi64) + reshape_51 = paddle._C_ops.reshape(add_36, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(where_5, full_int_array_1) + del where_5 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(unsqueeze_45, full_int_array_0) + del unsqueeze_45 + + # pd_op.add: (4x4x24x144x144xf32) <- (4x4x24x144x144xf32, 1x4x1x144x144xf32) + add_37 = paddle._C_ops.add(reshape_51, unsqueeze_8) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_66 = [16, 24, 144, 144] + + # pd_op.reshape: (16x24x144x144xf32) <- (4x4x24x144x144xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_37, full_int_array_66) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_5 = paddle._C_ops.softmax(reshape_247, -1) + del reshape_247 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_129 = paddle._C_ops.matmul(softmax_5, slice_5, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_35 = paddle._C_ops.transpose(matmul_129, [0, 2, 1, 3]) + del matmul_129 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_52 = paddle._C_ops.reshape(transpose_35, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_29 = paddle._C_ops.matmul(reshape_52, parameter_230, False, False) + del parameter_230 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_38 = paddle._C_ops.add(matmul_29, parameter_229) + del parameter_229 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(add_38, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_53, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_36 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(transpose_36, full_int_array_62) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_54, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(roll_5, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_6 = paddle._C_ops.full( + [], + float("0.978261"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_73 = full_6 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_8 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_181 = paddle._C_ops.add(full_6, uniform_8) + del uniform_8 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_8 = paddle._C_ops.floor(add_181) + del add_181 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_8 = paddle._C_ops.divide(reshape_55, full_6) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_8 = paddle._C_ops.multiply(divide_8, floor_8) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_39 = paddle._C_ops.add(add_34, multiply_8) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_39, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del parameter_226 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_40 = paddle._C_ops.add(matmul_30, parameter_225) + del parameter_225 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_40, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_31 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del parameter_224 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_41 = paddle._C_ops.add(matmul_31, parameter_223) + del parameter_223 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_9 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_182 = paddle._C_ops.add(full_6, uniform_9) + del uniform_9 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_9 = paddle._C_ops.floor(add_182) + del add_182 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_9 = paddle._C_ops.divide(add_41, full_6) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_9 = paddle._C_ops.multiply(divide_9, floor_9) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_42 = paddle._C_ops.add(add_39, multiply_9) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(layer_norm_45, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_249 = paddle._C_ops.reshape(reshape_56, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_37 = paddle._C_ops.transpose(reshape_249, [0, 1, 3, 2, 4, 5]) + del reshape_249 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(transpose_37, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_58 = paddle._C_ops.reshape(reshape_57, full_int_array_58) + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_32 = paddle._C_ops.matmul(reshape_58, parameter_220, False, False) + del parameter_220 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_43 = paddle._C_ops.add(matmul_32, parameter_219) + del parameter_219 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_250 = paddle._C_ops.reshape(add_43, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_38 = paddle._C_ops.transpose(reshape_250, [2, 0, 3, 1, 4]) + del reshape_250 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_36, full_0, float("0"), True) + del slice_36 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_39 = paddle._C_ops.transpose(slice_37, [0, 1, 3, 2]) + del slice_37 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_33 = paddle._C_ops.matmul(scale_6, transpose_39, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_59 = paddle._C_ops.reshape(data_31, full_int_array_15) + del data_31 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_6 = paddle._C_ops.index_select(data_32, reshape_59, 0) + del data_32 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_251 = paddle._C_ops.reshape(index_select_6, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_40 = paddle._C_ops.transpose(reshape_251, [2, 0, 1]) + del reshape_251 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_40, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_183 = paddle._C_ops.add(matmul_33, unsqueeze_9) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_6 = paddle._C_ops.softmax(add_183, -1) + del add_183 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_6, slice_6, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_41 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_60 = paddle._C_ops.reshape(transpose_41, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(reshape_60, parameter_218, False, False) + del parameter_218 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_44 = paddle._C_ops.add(matmul_34, parameter_217) + del parameter_217 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(add_44, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(reshape_61, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_42 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(transpose_42, full_int_array_62) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_7 = paddle._C_ops.full( + [], + float("0.973913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_82 = full_7 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_10 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_184 = paddle._C_ops.add(full_7, uniform_10) + del uniform_10 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_10 = paddle._C_ops.floor(add_184) + del add_184 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_10 = paddle._C_ops.divide(reshape_63, full_7) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_10 = paddle._C_ops.multiply(divide_10, floor_10) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_45 = paddle._C_ops.add(add_42, multiply_10) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del parameter_214 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_46 = paddle._C_ops.add(matmul_35, parameter_213) + del parameter_213 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_46, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_36 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del parameter_212 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_47 = paddle._C_ops.add(matmul_36, parameter_211) + del parameter_211 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_11 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_185 = paddle._C_ops.add(full_7, uniform_11) + del uniform_11 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_11 = paddle._C_ops.floor(add_185) + del add_185 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_11 = paddle._C_ops.divide(add_47, full_7) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_11 = paddle._C_ops.multiply(divide_11, floor_11) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_48 = paddle._C_ops.add(add_45, multiply_11) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(layer_norm_51, full_int_array_55) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_64, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_253 = paddle._C_ops.reshape(roll_6, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_43 = paddle._C_ops.transpose(reshape_253, [0, 1, 3, 2, 4, 5]) + del reshape_253 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(transpose_43, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_37 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_37, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_37 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_254 = paddle._C_ops.reshape(set_value__3, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_149 = paddle._C_ops.transpose(reshape_254, [0, 1, 3, 2, 4, 5]) + del reshape_254 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_255 = paddle._C_ops.reshape(transpose_149, full_int_array_36) + del transpose_149 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_256 = paddle._C_ops.reshape(reshape_255, full_int_array_37) + del reshape_255 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_2) + del reshape_256 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_46, unsqueeze_47) + del unsqueeze_46, unsqueeze_47 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_35, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_7 = paddle._C_ops.where(equal_3, full_36, where_6) + del equal_3, where_6 + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_37 = paddle._C_ops.matmul(reshape_66, parameter_208, False, False) + del parameter_208 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_49 = paddle._C_ops.add(matmul_37, parameter_207) + del parameter_207 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_257 = paddle._C_ops.reshape(add_49, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_44 = paddle._C_ops.transpose(reshape_257, [2, 0, 3, 1, 4]) + del reshape_257 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_38, full_0, float("0"), True) + del slice_38 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_45 = paddle._C_ops.transpose(slice_39, [0, 1, 3, 2]) + del slice_39 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_38 = paddle._C_ops.matmul(scale_7, transpose_45, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_67 = paddle._C_ops.reshape(data_33, full_int_array_15) + del data_33 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_7 = paddle._C_ops.index_select(data_34, reshape_67, 0) + del data_34 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_258 = paddle._C_ops.reshape(index_select_7, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_46 = paddle._C_ops.transpose(reshape_258, [2, 0, 1]) + del reshape_258 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(transpose_46, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_50 = paddle._C_ops.add(matmul_38, unsqueeze_10) + + # pd_op.reshape: (4x4x24x144x144xf32) <- (16x24x144x144xf32, 5xi64) + reshape_68 = paddle._C_ops.reshape(add_50, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(where_7, full_int_array_1) + del where_7 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_48, full_int_array_0) + del unsqueeze_48 + + # pd_op.add: (4x4x24x144x144xf32) <- (4x4x24x144x144xf32, 1x4x1x144x144xf32) + add_51 = paddle._C_ops.add(reshape_68, unsqueeze_11) + + # pd_op.reshape: (16x24x144x144xf32) <- (4x4x24x144x144xf32, 4xi64) + reshape_259 = paddle._C_ops.reshape(add_51, full_int_array_66) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_7 = paddle._C_ops.softmax(reshape_259, -1) + del reshape_259 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_131 = paddle._C_ops.matmul(softmax_7, slice_7, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_47 = paddle._C_ops.transpose(matmul_131, [0, 2, 1, 3]) + del matmul_131 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(transpose_47, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_39 = paddle._C_ops.matmul(reshape_69, parameter_206, False, False) + del parameter_206 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_52 = paddle._C_ops.add(matmul_39, parameter_205) + del parameter_205 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(add_52, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_260 = paddle._C_ops.reshape(reshape_70, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_48 = paddle._C_ops.transpose(reshape_260, [0, 1, 3, 2, 4, 5]) + del reshape_260 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(transpose_48, full_int_array_62) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_71, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(roll_7, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_8 = paddle._C_ops.full( + [], + float("0.969565"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_93 = full_8 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_12 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_186 = paddle._C_ops.add(full_8, uniform_12) + del uniform_12 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_12 = paddle._C_ops.floor(add_186) + del add_186 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_12 = paddle._C_ops.divide(reshape_72, full_8) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_12 = paddle._C_ops.multiply(divide_12, floor_12) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_53 = paddle._C_ops.add(add_48, multiply_12) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del parameter_202 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_54 = paddle._C_ops.add(matmul_40, parameter_201) + del parameter_201 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_54, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_41 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del parameter_200 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_55 = paddle._C_ops.add(matmul_41, parameter_199) + del parameter_199 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_13 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_187 = paddle._C_ops.add(full_8, uniform_13) + del uniform_13 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_13 = paddle._C_ops.floor(add_187) + del add_187 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_13 = paddle._C_ops.divide(add_55, full_8) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_13 = paddle._C_ops.multiply(divide_13, floor_13) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_56 = paddle._C_ops.add(add_53, multiply_13) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(layer_norm_57, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_261 = paddle._C_ops.reshape(reshape_73, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_49 = paddle._C_ops.transpose(reshape_261, [0, 1, 3, 2, 4, 5]) + del reshape_261 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(transpose_49, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_75 = paddle._C_ops.reshape(reshape_74, full_int_array_58) + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_42 = paddle._C_ops.matmul(reshape_75, parameter_196, False, False) + del parameter_196 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_57 = paddle._C_ops.add(matmul_42, parameter_195) + del parameter_195 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_262 = paddle._C_ops.reshape(add_57, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_50 = paddle._C_ops.transpose(reshape_262, [2, 0, 3, 1, 4]) + del reshape_262 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_40, full_0, float("0"), True) + del slice_40 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_51 = paddle._C_ops.transpose(slice_41, [0, 1, 3, 2]) + del slice_41 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_43 = paddle._C_ops.matmul(scale_8, transpose_51, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_76 = paddle._C_ops.reshape(data_35, full_int_array_15) + del data_35 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_8 = paddle._C_ops.index_select(data_36, reshape_76, 0) + del data_36 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(index_select_8, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_52 = paddle._C_ops.transpose(reshape_263, [2, 0, 1]) + del reshape_263 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_52, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_188 = paddle._C_ops.add(matmul_43, unsqueeze_12) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_8 = paddle._C_ops.softmax(add_188, -1) + del add_188 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_132 = paddle._C_ops.matmul(softmax_8, slice_8, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_53 = paddle._C_ops.transpose(matmul_132, [0, 2, 1, 3]) + del matmul_132 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_77 = paddle._C_ops.reshape(transpose_53, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_44 = paddle._C_ops.matmul(reshape_77, parameter_194, False, False) + del parameter_194 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_58 = paddle._C_ops.add(matmul_44, parameter_193) + del parameter_193 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(add_58, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_264 = paddle._C_ops.reshape(reshape_78, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_54 = paddle._C_ops.transpose(reshape_264, [0, 1, 3, 2, 4, 5]) + del reshape_264 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_54, full_int_array_62) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_9 = paddle._C_ops.full( + [], + float("0.965217"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_102 = full_9 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_14 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_189 = paddle._C_ops.add(full_9, uniform_14) + del uniform_14 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_14 = paddle._C_ops.floor(add_189) + del add_189 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_14 = paddle._C_ops.divide(reshape_80, full_9) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_14 = paddle._C_ops.multiply(divide_14, floor_14) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_59 = paddle._C_ops.add(add_56, multiply_14) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_59, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_45 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_60 = paddle._C_ops.add(matmul_45, parameter_189) + del parameter_189 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_60, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_46 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del parameter_188 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_61 = paddle._C_ops.add(matmul_46, parameter_187) + del parameter_187 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_15 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_190 = paddle._C_ops.add(full_9, uniform_15) + del uniform_15 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_15 = paddle._C_ops.floor(add_190) + del add_190 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_15 = paddle._C_ops.divide(add_61, full_9) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_15 = paddle._C_ops.multiply(divide_15, floor_15) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_62 = paddle._C_ops.add(add_59, multiply_15) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(layer_norm_63, full_int_array_55) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_81, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(roll_8, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_55 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_55, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_38 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_38, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_38 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_266 = paddle._C_ops.reshape(set_value__4, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_150 = paddle._C_ops.transpose(reshape_266, [0, 1, 3, 2, 4, 5]) + del reshape_266 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_267 = paddle._C_ops.reshape(transpose_150, full_int_array_36) + del transpose_150 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_268 = paddle._C_ops.reshape(reshape_267, full_int_array_37) + del reshape_267 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_2) + del reshape_268 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_35, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_9 = paddle._C_ops.where(equal_4, full_36, where_8) + del equal_4, where_8 + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_47 = paddle._C_ops.matmul(reshape_83, parameter_184, False, False) + del parameter_184 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_63 = paddle._C_ops.add(matmul_47, parameter_183) + del parameter_183 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_269 = paddle._C_ops.reshape(add_63, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_56 = paddle._C_ops.transpose(reshape_269, [2, 0, 3, 1, 4]) + del reshape_269 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_42, full_0, float("0"), True) + del slice_42 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_57 = paddle._C_ops.transpose(slice_43, [0, 1, 3, 2]) + del slice_43 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_48 = paddle._C_ops.matmul(scale_9, transpose_57, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_84 = paddle._C_ops.reshape(data_37, full_int_array_15) + del data_37 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_9 = paddle._C_ops.index_select(data_38, reshape_84, 0) + del data_38 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_270 = paddle._C_ops.reshape(index_select_9, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_58 = paddle._C_ops.transpose(reshape_270, [2, 0, 1]) + del reshape_270 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(transpose_58, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_64 = paddle._C_ops.add(matmul_48, unsqueeze_13) + + # pd_op.reshape: (4x4x24x144x144xf32) <- (16x24x144x144xf32, 5xi64) + reshape_85 = paddle._C_ops.reshape(add_64, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(where_9, full_int_array_1) + del where_9 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(unsqueeze_51, full_int_array_0) + del unsqueeze_51 + + # pd_op.add: (4x4x24x144x144xf32) <- (4x4x24x144x144xf32, 1x4x1x144x144xf32) + add_65 = paddle._C_ops.add(reshape_85, unsqueeze_14) + + # pd_op.reshape: (16x24x144x144xf32) <- (4x4x24x144x144xf32, 4xi64) + reshape_271 = paddle._C_ops.reshape(add_65, full_int_array_66) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_9 = paddle._C_ops.softmax(reshape_271, -1) + del reshape_271 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_133 = paddle._C_ops.matmul(softmax_9, slice_9, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_59 = paddle._C_ops.transpose(matmul_133, [0, 2, 1, 3]) + del matmul_133 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(transpose_59, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(reshape_86, parameter_182, False, False) + del parameter_182 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_66 = paddle._C_ops.add(matmul_49, parameter_181) + del parameter_181 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(add_66, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_272 = paddle._C_ops.reshape(reshape_87, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_60 = paddle._C_ops.transpose(reshape_272, [0, 1, 3, 2, 4, 5]) + del reshape_272 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(transpose_60, full_int_array_62) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_88, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(roll_9, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_10 = paddle._C_ops.full( + [], + float("0.96087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_113 = full_10 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_16 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_191 = paddle._C_ops.add(full_10, uniform_16) + del uniform_16 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_16 = paddle._C_ops.floor(add_191) + del add_191 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_16 = paddle._C_ops.divide(reshape_89, full_10) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_16 = paddle._C_ops.multiply(divide_16, floor_16) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_67 = paddle._C_ops.add(add_62, multiply_16) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_67, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del parameter_178 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_68 = paddle._C_ops.add(matmul_50, parameter_177) + del parameter_177 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_68, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_51 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del parameter_176 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_69 = paddle._C_ops.add(matmul_51, parameter_175) + del parameter_175 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_17 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_192 = paddle._C_ops.add(full_10, uniform_17) + del uniform_17 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_17 = paddle._C_ops.floor(add_192) + del add_192 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_17 = paddle._C_ops.divide(add_69, full_10) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_17 = paddle._C_ops.multiply(divide_17, floor_17) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_70 = paddle._C_ops.add(add_67, multiply_17) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(layer_norm_69, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_273 = paddle._C_ops.reshape(reshape_90, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_61 = paddle._C_ops.transpose(reshape_273, [0, 1, 3, 2, 4, 5]) + del reshape_273 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_91 = paddle._C_ops.reshape(transpose_61, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_92 = paddle._C_ops.reshape(reshape_91, full_int_array_58) + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_52 = paddle._C_ops.matmul(reshape_92, parameter_172, False, False) + del parameter_172 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_71 = paddle._C_ops.add(matmul_52, parameter_171) + del parameter_171 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_274 = paddle._C_ops.reshape(add_71, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_62 = paddle._C_ops.transpose(reshape_274, [2, 0, 3, 1, 4]) + del reshape_274 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_44, full_0, float("0"), True) + del slice_44 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_63 = paddle._C_ops.transpose(slice_45, [0, 1, 3, 2]) + del slice_45 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_53 = paddle._C_ops.matmul(scale_10, transpose_63, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_93 = paddle._C_ops.reshape(data_39, full_int_array_15) + del data_39 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_10 = paddle._C_ops.index_select(data_40, reshape_93, 0) + del data_40 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(index_select_10, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_64 = paddle._C_ops.transpose(reshape_275, [2, 0, 1]) + del reshape_275 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_64, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_193 = paddle._C_ops.add(matmul_53, unsqueeze_15) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_10 = paddle._C_ops.softmax(add_193, -1) + del add_193 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_134 = paddle._C_ops.matmul(softmax_10, slice_10, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_65 = paddle._C_ops.transpose(matmul_134, [0, 2, 1, 3]) + del matmul_134 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_94 = paddle._C_ops.reshape(transpose_65, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(reshape_94, parameter_170, False, False) + del parameter_170 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_72 = paddle._C_ops.add(matmul_54, parameter_169) + del parameter_169 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_95 = paddle._C_ops.reshape(add_72, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_276 = paddle._C_ops.reshape(reshape_95, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_66 = paddle._C_ops.transpose(reshape_276, [0, 1, 3, 2, 4, 5]) + del reshape_276 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_66, full_int_array_62) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], + float("0.956522"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_122 = full_11 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_18 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_194 = paddle._C_ops.add(full_11, uniform_18) + del uniform_18 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_18 = paddle._C_ops.floor(add_194) + del add_194 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_18 = paddle._C_ops.divide(reshape_97, full_11) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_18 = paddle._C_ops.multiply(divide_18, floor_18) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_73 = paddle._C_ops.add(add_70, multiply_18) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del parameter_166 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_74 = paddle._C_ops.add(matmul_55, parameter_165) + del parameter_165 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_74, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_56 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del parameter_164 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_56, parameter_163) + del parameter_163 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_19 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_195 = paddle._C_ops.add(full_11, uniform_19) + del uniform_19 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_19 = paddle._C_ops.floor(add_195) + del add_195 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_19 = paddle._C_ops.divide(add_75, full_11) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_19 = paddle._C_ops.multiply(divide_19, floor_19) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_76 = paddle._C_ops.add(add_73, multiply_19) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(layer_norm_75, full_int_array_55) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_98, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(roll_10, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_67 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_99 = paddle._C_ops.reshape(transpose_67, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(reshape_99, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_39 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_39, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_39 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_278 = paddle._C_ops.reshape(set_value__5, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_278, [0, 1, 3, 2, 4, 5]) + del reshape_278 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_279 = paddle._C_ops.reshape(transpose_151, full_int_array_36) + del transpose_151 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_280 = paddle._C_ops.reshape(reshape_279, full_int_array_37) + del reshape_279 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_2) + del reshape_280 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_52, unsqueeze_53) + del unsqueeze_52, unsqueeze_53 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_35, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_11 = paddle._C_ops.where(equal_5, full_36, where_10) + del equal_5, where_10 + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_57 = paddle._C_ops.matmul(reshape_100, parameter_160, False, False) + del parameter_160 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_77 = paddle._C_ops.add(matmul_57, parameter_159) + del parameter_159 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_281 = paddle._C_ops.reshape(add_77, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_68 = paddle._C_ops.transpose(reshape_281, [2, 0, 3, 1, 4]) + del reshape_281 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_46, full_0, float("0"), True) + del slice_46 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_69 = paddle._C_ops.transpose(slice_47, [0, 1, 3, 2]) + del slice_47 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_58 = paddle._C_ops.matmul(scale_11, transpose_69, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_101 = paddle._C_ops.reshape(data_41, full_int_array_15) + del data_41 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_11 = paddle._C_ops.index_select(data_42, reshape_101, 0) + del data_42 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_282 = paddle._C_ops.reshape(index_select_11, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_70 = paddle._C_ops.transpose(reshape_282, [2, 0, 1]) + del reshape_282 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(transpose_70, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_78 = paddle._C_ops.add(matmul_58, unsqueeze_16) + + # pd_op.reshape: (4x4x24x144x144xf32) <- (16x24x144x144xf32, 5xi64) + reshape_102 = paddle._C_ops.reshape(add_78, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(where_11, full_int_array_1) + del where_11 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_54, full_int_array_0) + del unsqueeze_54 + + # pd_op.add: (4x4x24x144x144xf32) <- (4x4x24x144x144xf32, 1x4x1x144x144xf32) + add_79 = paddle._C_ops.add(reshape_102, unsqueeze_17) + + # pd_op.reshape: (16x24x144x144xf32) <- (4x4x24x144x144xf32, 4xi64) + reshape_283 = paddle._C_ops.reshape(add_79, full_int_array_66) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_11 = paddle._C_ops.softmax(reshape_283, -1) + del reshape_283 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_135 = paddle._C_ops.matmul(softmax_11, slice_11, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_71 = paddle._C_ops.transpose(matmul_135, [0, 2, 1, 3]) + del matmul_135 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_103 = paddle._C_ops.reshape(transpose_71, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(reshape_103, parameter_158, False, False) + del parameter_158 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_80 = paddle._C_ops.add(matmul_59, parameter_157) + del parameter_157 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(add_80, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(reshape_104, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_72 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_105 = paddle._C_ops.reshape(transpose_72, full_int_array_62) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_105, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_106 = paddle._C_ops.reshape(roll_11, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_12 = paddle._C_ops.full( + [], + float("0.952174"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_133 = full_12 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_20 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_196 = paddle._C_ops.add(full_12, uniform_20) + del uniform_20 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_20 = paddle._C_ops.floor(add_196) + del add_196 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_20 = paddle._C_ops.divide(reshape_106, full_12) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_20 = paddle._C_ops.multiply(divide_20, floor_20) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_81 = paddle._C_ops.add(add_76, multiply_20) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del parameter_154 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_82 = paddle._C_ops.add(matmul_60, parameter_153) + del parameter_153 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_82, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_61 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del parameter_152 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_83 = paddle._C_ops.add(matmul_61, parameter_151) + del parameter_151 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_21 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_197 = paddle._C_ops.add(full_12, uniform_21) + del uniform_21 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_21 = paddle._C_ops.floor(add_197) + del add_197 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_21 = paddle._C_ops.divide(add_83, full_12) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_21 = paddle._C_ops.multiply(divide_21, floor_21) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_84 = paddle._C_ops.add(add_81, multiply_21) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_84, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(layer_norm_81, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_285 = paddle._C_ops.reshape(reshape_107, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_73 = paddle._C_ops.transpose(reshape_285, [0, 1, 3, 2, 4, 5]) + del reshape_285 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_73, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_58) + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_62 = paddle._C_ops.matmul(reshape_109, parameter_148, False, False) + del parameter_148 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_85 = paddle._C_ops.add(matmul_62, parameter_147) + del parameter_147 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_286 = paddle._C_ops.reshape(add_85, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_286, [2, 0, 3, 1, 4]) + del reshape_286 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_48, full_0, float("0"), True) + del slice_48 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_49, [0, 1, 3, 2]) + del slice_49 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_63 = paddle._C_ops.matmul(scale_12, transpose_75, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_110 = paddle._C_ops.reshape(data_44, full_int_array_15) + del data_44 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_12 = paddle._C_ops.index_select(data_45, reshape_110, 0) + del data_45 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_287 = paddle._C_ops.reshape(index_select_12, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_76 = paddle._C_ops.transpose(reshape_287, [2, 0, 1]) + del reshape_287 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_198 = paddle._C_ops.add(matmul_63, unsqueeze_18) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_12 = paddle._C_ops.softmax(add_198, -1) + del add_198 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_136 = paddle._C_ops.matmul(softmax_12, slice_12, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_136, [0, 2, 1, 3]) + del matmul_136 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(transpose_77, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_64 = paddle._C_ops.matmul(reshape_111, parameter_146, False, False) + del parameter_146 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_86 = paddle._C_ops.add(matmul_64, parameter_145) + del parameter_145 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_112 = paddle._C_ops.reshape(add_86, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_288 = paddle._C_ops.reshape(reshape_112, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_78 = paddle._C_ops.transpose(reshape_288, [0, 1, 3, 2, 4, 5]) + del reshape_288 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_113 = paddle._C_ops.reshape(transpose_78, full_int_array_62) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_114 = paddle._C_ops.reshape(reshape_113, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_13 = paddle._C_ops.full( + [], + float("0.947826"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_142 = full_13 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_22 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_199 = paddle._C_ops.add(full_13, uniform_22) + del uniform_22 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_22 = paddle._C_ops.floor(add_199) + del add_199 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_22 = paddle._C_ops.divide(reshape_114, full_13) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_22 = paddle._C_ops.multiply(divide_22, floor_22) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_87 = paddle._C_ops.add(add_84, multiply_22) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del parameter_142 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_88 = paddle._C_ops.add(matmul_65, parameter_141) + del parameter_141 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_12 = paddle._C_ops.gelu(add_88, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_66 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del parameter_140 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_89 = paddle._C_ops.add(matmul_66, parameter_139) + del parameter_139 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_23 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_200 = paddle._C_ops.add(full_13, uniform_23) + del uniform_23 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_23 = paddle._C_ops.floor(add_200) + del add_200 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_23 = paddle._C_ops.divide(add_89, full_13) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_23 = paddle._C_ops.multiply(divide_23, floor_23) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_90 = paddle._C_ops.add(add_87, multiply_23) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(layer_norm_87, full_int_array_55) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_115, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_289 = paddle._C_ops.reshape(roll_12, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_79 = paddle._C_ops.transpose(reshape_289, [0, 1, 3, 2, 4, 5]) + del reshape_289 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_116 = paddle._C_ops.reshape(transpose_79, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_117 = paddle._C_ops.reshape(reshape_116, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_40 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_40, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_40 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_290 = paddle._C_ops.reshape(set_value__6, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_152 = paddle._C_ops.transpose(reshape_290, [0, 1, 3, 2, 4, 5]) + del reshape_290 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(transpose_152, full_int_array_36) + del transpose_152 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_292 = paddle._C_ops.reshape(reshape_291, full_int_array_37) + del reshape_291 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_2) + del reshape_292 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_35, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_13 = paddle._C_ops.where(equal_6, full_36, where_12) + del equal_6, where_12 + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_67 = paddle._C_ops.matmul(reshape_117, parameter_136, False, False) + del parameter_136 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_91 = paddle._C_ops.add(matmul_67, parameter_135) + del parameter_135 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_293 = paddle._C_ops.reshape(add_91, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_293, [2, 0, 3, 1, 4]) + del reshape_293 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_50, full_0, float("0"), True) + del slice_50 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_51, [0, 1, 3, 2]) + del slice_51 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_68 = paddle._C_ops.matmul(scale_13, transpose_81, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_118 = paddle._C_ops.reshape(data_47, full_int_array_15) + del data_47 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_13 = paddle._C_ops.index_select(data_48, reshape_118, 0) + del data_48 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_294 = paddle._C_ops.reshape(index_select_13, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_82 = paddle._C_ops.transpose(reshape_294, [2, 0, 1]) + del reshape_294 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_92 = paddle._C_ops.add(matmul_68, unsqueeze_19) + + # pd_op.reshape: (4x4x24x144x144xf32) <- (16x24x144x144xf32, 5xi64) + reshape_119 = paddle._C_ops.reshape(add_92, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(where_13, full_int_array_1) + del where_13 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(unsqueeze_57, full_int_array_0) + del unsqueeze_57 + + # pd_op.add: (4x4x24x144x144xf32) <- (4x4x24x144x144xf32, 1x4x1x144x144xf32) + add_93 = paddle._C_ops.add(reshape_119, unsqueeze_20) + + # pd_op.reshape: (16x24x144x144xf32) <- (4x4x24x144x144xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(add_93, full_int_array_66) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_13 = paddle._C_ops.softmax(reshape_295, -1) + del reshape_295 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_13, slice_13, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_120 = paddle._C_ops.reshape(transpose_83, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_69 = paddle._C_ops.matmul(reshape_120, parameter_134, False, False) + del parameter_134 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_69, parameter_133) + del parameter_133 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(add_94, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_296 = paddle._C_ops.reshape(reshape_121, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_84 = paddle._C_ops.transpose(reshape_296, [0, 1, 3, 2, 4, 5]) + del reshape_296 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_122 = paddle._C_ops.reshape(transpose_84, full_int_array_62) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_122, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_123 = paddle._C_ops.reshape(roll_13, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_14 = paddle._C_ops.full( + [], + float("0.943478"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_153 = full_14 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_24 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_201 = paddle._C_ops.add(full_14, uniform_24) + del uniform_24 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_24 = paddle._C_ops.floor(add_201) + del add_201 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_24 = paddle._C_ops.divide(reshape_123, full_14) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_24 = paddle._C_ops.multiply(divide_24, floor_24) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_95 = paddle._C_ops.add(add_90, multiply_24) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del parameter_130 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_96 = paddle._C_ops.add(matmul_70, parameter_129) + del parameter_129 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_13 = paddle._C_ops.gelu(add_96, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_71 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del parameter_128 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_97 = paddle._C_ops.add(matmul_71, parameter_127) + del parameter_127 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_25 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_202 = paddle._C_ops.add(full_14, uniform_25) + del uniform_25 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_25 = paddle._C_ops.floor(add_202) + del add_202 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_25 = paddle._C_ops.divide(add_97, full_14) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_25 = paddle._C_ops.multiply(divide_25, floor_25) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_98 = paddle._C_ops.add(add_95, multiply_25) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_124 = paddle._C_ops.reshape(layer_norm_93, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_297 = paddle._C_ops.reshape(reshape_124, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_85 = paddle._C_ops.transpose(reshape_297, [0, 1, 3, 2, 4, 5]) + del reshape_297 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_85, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_58) + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_72 = paddle._C_ops.matmul(reshape_126, parameter_124, False, False) + del parameter_124 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_99 = paddle._C_ops.add(matmul_72, parameter_123) + del parameter_123 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_298 = paddle._C_ops.reshape(add_99, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_86 = paddle._C_ops.transpose(reshape_298, [2, 0, 3, 1, 4]) + del reshape_298 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_52, full_0, float("0"), True) + del slice_52 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_87 = paddle._C_ops.transpose(slice_53, [0, 1, 3, 2]) + del slice_53 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_73 = paddle._C_ops.matmul(scale_14, transpose_87, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_127 = paddle._C_ops.reshape(data_1, full_int_array_15) + del data_1 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_14 = paddle._C_ops.index_select(data_2, reshape_127, 0) + del data_2 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_299 = paddle._C_ops.reshape(index_select_14, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_88 = paddle._C_ops.transpose(reshape_299, [2, 0, 1]) + del reshape_299 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_88, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_203 = paddle._C_ops.add(matmul_73, unsqueeze_21) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_14 = paddle._C_ops.softmax(add_203, -1) + del add_203 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_138 = paddle._C_ops.matmul(softmax_14, slice_14, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_89 = paddle._C_ops.transpose(matmul_138, [0, 2, 1, 3]) + del matmul_138 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_128 = paddle._C_ops.reshape(transpose_89, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(reshape_128, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_100 = paddle._C_ops.add(matmul_74, parameter_121) + del parameter_121 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_129 = paddle._C_ops.reshape(add_100, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_300 = paddle._C_ops.reshape(reshape_129, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_90 = paddle._C_ops.transpose(reshape_300, [0, 1, 3, 2, 4, 5]) + del reshape_300 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_130 = paddle._C_ops.reshape(transpose_90, full_int_array_62) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_131 = paddle._C_ops.reshape(reshape_130, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_15 = paddle._C_ops.full( + [], + float("0.93913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_162 = full_15 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_26 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_204 = paddle._C_ops.add(full_15, uniform_26) + del uniform_26 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_26 = paddle._C_ops.floor(add_204) + del add_204 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_26 = paddle._C_ops.divide(reshape_131, full_15) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_26 = paddle._C_ops.multiply(divide_26, floor_26) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_101 = paddle._C_ops.add(add_98, multiply_26) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del parameter_118 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_102 = paddle._C_ops.add(matmul_75, parameter_117) + del parameter_117 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_14 = paddle._C_ops.gelu(add_102, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_76 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del parameter_116 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_76, parameter_115) + del parameter_115 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_27 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_205 = paddle._C_ops.add(full_15, uniform_27) + del uniform_27 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_27 = paddle._C_ops.floor(add_205) + del add_205 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_27 = paddle._C_ops.divide(add_103, full_15) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_27 = paddle._C_ops.multiply(divide_27, floor_27) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_104 = paddle._C_ops.add(add_101, multiply_27) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_104, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_132 = paddle._C_ops.reshape(layer_norm_99, full_int_array_55) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_132, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_301 = paddle._C_ops.reshape(roll_14, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_91 = paddle._C_ops.transpose(reshape_301, [0, 1, 3, 2, 4, 5]) + del reshape_301 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_91, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_41 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_41, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_41 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_302 = paddle._C_ops.reshape(set_value__7, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_153 = paddle._C_ops.transpose(reshape_302, [0, 1, 3, 2, 4, 5]) + del reshape_302 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_303 = paddle._C_ops.reshape(transpose_153, full_int_array_36) + del transpose_153 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_304 = paddle._C_ops.reshape(reshape_303, full_int_array_37) + del reshape_303 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_2) + del reshape_304 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_58, unsqueeze_59) + del unsqueeze_58, unsqueeze_59 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_35, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_15 = paddle._C_ops.where(equal_7, full_36, where_14) + del equal_7, where_14 + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_77 = paddle._C_ops.matmul(reshape_134, parameter_112, False, False) + del parameter_112 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_105 = paddle._C_ops.add(matmul_77, parameter_111) + del parameter_111 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_305 = paddle._C_ops.reshape(add_105, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_92 = paddle._C_ops.transpose(reshape_305, [2, 0, 3, 1, 4]) + del reshape_305 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_54, full_0, float("0"), True) + del slice_54 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_93 = paddle._C_ops.transpose(slice_55, [0, 1, 3, 2]) + del slice_55 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_78 = paddle._C_ops.matmul(scale_15, transpose_93, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_135 = paddle._C_ops.reshape(data_3, full_int_array_15) + del data_3 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_15 = paddle._C_ops.index_select(data_4, reshape_135, 0) + del data_4 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_306 = paddle._C_ops.reshape(index_select_15, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_94 = paddle._C_ops.transpose(reshape_306, [2, 0, 1]) + del reshape_306 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(transpose_94, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_106 = paddle._C_ops.add(matmul_78, unsqueeze_22) + + # pd_op.reshape: (4x4x24x144x144xf32) <- (16x24x144x144xf32, 5xi64) + reshape_136 = paddle._C_ops.reshape(add_106, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(where_15, full_int_array_1) + del where_15 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_60, full_int_array_0) + del unsqueeze_60 + + # pd_op.add: (4x4x24x144x144xf32) <- (4x4x24x144x144xf32, 1x4x1x144x144xf32) + add_107 = paddle._C_ops.add(reshape_136, unsqueeze_23) + + # pd_op.reshape: (16x24x144x144xf32) <- (4x4x24x144x144xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(add_107, full_int_array_66) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_15 = paddle._C_ops.softmax(reshape_307, -1) + del reshape_307 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_139 = paddle._C_ops.matmul(softmax_15, slice_15, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_95 = paddle._C_ops.transpose(matmul_139, [0, 2, 1, 3]) + del matmul_139 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_137 = paddle._C_ops.reshape(transpose_95, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_79 = paddle._C_ops.matmul(reshape_137, parameter_110, False, False) + del parameter_110 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_108 = paddle._C_ops.add(matmul_79, parameter_109) + del parameter_109 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_138 = paddle._C_ops.reshape(add_108, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_308 = paddle._C_ops.reshape(reshape_138, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_96 = paddle._C_ops.transpose(reshape_308, [0, 1, 3, 2, 4, 5]) + del reshape_308 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_139 = paddle._C_ops.reshape(transpose_96, full_int_array_62) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_139, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_140 = paddle._C_ops.reshape(roll_15, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_16 = paddle._C_ops.full( + [], + float("0.934783"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_173 = full_16 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_28 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_206 = paddle._C_ops.add(full_16, uniform_28) + del uniform_28 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_28 = paddle._C_ops.floor(add_206) + del add_206 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_28 = paddle._C_ops.divide(reshape_140, full_16) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_28 = paddle._C_ops.multiply(divide_28, floor_28) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_109 = paddle._C_ops.add(add_104, multiply_28) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_109, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del parameter_106 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_110 = paddle._C_ops.add(matmul_80, parameter_105) + del parameter_105 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_15 = paddle._C_ops.gelu(add_110, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_81 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del parameter_104 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_111 = paddle._C_ops.add(matmul_81, parameter_103) + del parameter_103 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_29 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_207 = paddle._C_ops.add(full_16, uniform_29) + del uniform_29 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_29 = paddle._C_ops.floor(add_207) + del add_207 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_29 = paddle._C_ops.divide(add_111, full_16) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_29 = paddle._C_ops.multiply(divide_29, floor_29) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_112 = paddle._C_ops.add(add_109, multiply_29) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_112, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(layer_norm_105, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_309 = paddle._C_ops.reshape(reshape_141, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_97 = paddle._C_ops.transpose(reshape_309, [0, 1, 3, 2, 4, 5]) + del reshape_309 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_142 = paddle._C_ops.reshape(transpose_97, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_143 = paddle._C_ops.reshape(reshape_142, full_int_array_58) + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_82 = paddle._C_ops.matmul(reshape_143, parameter_100, False, False) + del parameter_100 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_113 = paddle._C_ops.add(matmul_82, parameter_99) + del parameter_99 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_310 = paddle._C_ops.reshape(add_113, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_98 = paddle._C_ops.transpose(reshape_310, [2, 0, 3, 1, 4]) + del reshape_310 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_56, full_0, float("0"), True) + del slice_56 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_99 = paddle._C_ops.transpose(slice_57, [0, 1, 3, 2]) + del slice_57 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_83 = paddle._C_ops.matmul(scale_16, transpose_99, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_144 = paddle._C_ops.reshape(data_5, full_int_array_15) + del data_5 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_16 = paddle._C_ops.index_select(data_6, reshape_144, 0) + del data_6 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_311 = paddle._C_ops.reshape(index_select_16, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_100 = paddle._C_ops.transpose(reshape_311, [2, 0, 1]) + del reshape_311 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_100, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_208 = paddle._C_ops.add(matmul_83, unsqueeze_24) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_16 = paddle._C_ops.softmax(add_208, -1) + del add_208 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_140 = paddle._C_ops.matmul(softmax_16, slice_16, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_101 = paddle._C_ops.transpose(matmul_140, [0, 2, 1, 3]) + del matmul_140 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_145 = paddle._C_ops.reshape(transpose_101, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_84 = paddle._C_ops.matmul(reshape_145, parameter_98, False, False) + del parameter_98 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_114 = paddle._C_ops.add(matmul_84, parameter_97) + del parameter_97 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_114, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_312 = paddle._C_ops.reshape(reshape_146, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_102 = paddle._C_ops.transpose(reshape_312, [0, 1, 3, 2, 4, 5]) + del reshape_312 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_147 = paddle._C_ops.reshape(transpose_102, full_int_array_62) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_148 = paddle._C_ops.reshape(reshape_147, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_17 = paddle._C_ops.full( + [], + float("0.930435"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_182 = full_17 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_30 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_209 = paddle._C_ops.add(full_17, uniform_30) + del uniform_30 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_30 = paddle._C_ops.floor(add_209) + del add_209 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_30 = paddle._C_ops.divide(reshape_148, full_17) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_30 = paddle._C_ops.multiply(divide_30, floor_30) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_115 = paddle._C_ops.add(add_112, multiply_30) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_115, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_85 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del parameter_94 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_116 = paddle._C_ops.add(matmul_85, parameter_93) + del parameter_93 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_16 = paddle._C_ops.gelu(add_116, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_86 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del parameter_92 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_117 = paddle._C_ops.add(matmul_86, parameter_91) + del parameter_91 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_31 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_210 = paddle._C_ops.add(full_17, uniform_31) + del uniform_31 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_31 = paddle._C_ops.floor(add_210) + del add_210 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_31 = paddle._C_ops.divide(add_117, full_17) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_31 = paddle._C_ops.multiply(divide_31, floor_31) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_118 = paddle._C_ops.add(add_115, multiply_31) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_149 = paddle._C_ops.reshape(layer_norm_111, full_int_array_55) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_149, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(roll_16, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_103 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_103, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(reshape_150, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_42 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_42, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_42 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_314 = paddle._C_ops.reshape(set_value__8, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_154 = paddle._C_ops.transpose(reshape_314, [0, 1, 3, 2, 4, 5]) + del reshape_314 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_315 = paddle._C_ops.reshape(transpose_154, full_int_array_36) + del transpose_154 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_316 = paddle._C_ops.reshape(reshape_315, full_int_array_37) + del reshape_315 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_2) + del reshape_316 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_35, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_17 = paddle._C_ops.where(equal_8, full_36, where_16) + del equal_8, where_16 + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_87 = paddle._C_ops.matmul(reshape_151, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_119 = paddle._C_ops.add(matmul_87, parameter_87) + del parameter_87 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_317 = paddle._C_ops.reshape(add_119, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_104 = paddle._C_ops.transpose(reshape_317, [2, 0, 3, 1, 4]) + del reshape_317 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_58, full_0, float("0"), True) + del slice_58 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_105 = paddle._C_ops.transpose(slice_59, [0, 1, 3, 2]) + del slice_59 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_88 = paddle._C_ops.matmul(scale_17, transpose_105, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_152 = paddle._C_ops.reshape(data_7, full_int_array_15) + del data_7 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_17 = paddle._C_ops.index_select(data_8, reshape_152, 0) + del data_8 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_17, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_106 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(transpose_106, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_120 = paddle._C_ops.add(matmul_88, unsqueeze_25) + + # pd_op.reshape: (4x4x24x144x144xf32) <- (16x24x144x144xf32, 5xi64) + reshape_153 = paddle._C_ops.reshape(add_120, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(where_17, full_int_array_1) + del where_17 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(unsqueeze_63, full_int_array_0) + del unsqueeze_63 + + # pd_op.add: (4x4x24x144x144xf32) <- (4x4x24x144x144xf32, 1x4x1x144x144xf32) + add_121 = paddle._C_ops.add(reshape_153, unsqueeze_26) + + # pd_op.reshape: (16x24x144x144xf32) <- (4x4x24x144x144xf32, 4xi64) + reshape_319 = paddle._C_ops.reshape(add_121, full_int_array_66) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_17 = paddle._C_ops.softmax(reshape_319, -1) + del reshape_319 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_141 = paddle._C_ops.matmul(softmax_17, slice_17, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_107 = paddle._C_ops.transpose(matmul_141, [0, 2, 1, 3]) + del matmul_141 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_154 = paddle._C_ops.reshape(transpose_107, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(reshape_154, parameter_86, False, False) + del parameter_86 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_122 = paddle._C_ops.add(matmul_89, parameter_85) + del parameter_85 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_155 = paddle._C_ops.reshape(add_122, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_320 = paddle._C_ops.reshape(reshape_155, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_108 = paddle._C_ops.transpose(reshape_320, [0, 1, 3, 2, 4, 5]) + del reshape_320 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_156 = paddle._C_ops.reshape(transpose_108, full_int_array_62) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_156, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_157 = paddle._C_ops.reshape(roll_17, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_18 = paddle._C_ops.full( + [], + float("0.926087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_193 = full_18 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_32 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_211 = paddle._C_ops.add(full_18, uniform_32) + del uniform_32 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_32 = paddle._C_ops.floor(add_211) + del add_211 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_32 = paddle._C_ops.divide(reshape_157, full_18) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_32 = paddle._C_ops.multiply(divide_32, floor_32) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_123 = paddle._C_ops.add(add_118, multiply_32) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_123, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del parameter_82 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_124 = paddle._C_ops.add(matmul_90, parameter_81) + del parameter_81 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_17 = paddle._C_ops.gelu(add_124, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_91 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del parameter_80 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_125 = paddle._C_ops.add(matmul_91, parameter_79) + del parameter_79 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_33 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_212 = paddle._C_ops.add(full_18, uniform_33) + del uniform_33 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_33 = paddle._C_ops.floor(add_212) + del add_212 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_33 = paddle._C_ops.divide(add_125, full_18) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_33 = paddle._C_ops.multiply(divide_33, floor_33) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_126 = paddle._C_ops.add(add_123, multiply_33) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_126, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_158 = paddle._C_ops.reshape(layer_norm_117, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_321 = paddle._C_ops.reshape(reshape_158, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_109 = paddle._C_ops.transpose(reshape_321, [0, 1, 3, 2, 4, 5]) + del reshape_321 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_159 = paddle._C_ops.reshape(transpose_109, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_160 = paddle._C_ops.reshape(reshape_159, full_int_array_58) + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_92 = paddle._C_ops.matmul(reshape_160, parameter_76, False, False) + del parameter_76 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_127 = paddle._C_ops.add(matmul_92, parameter_75) + del parameter_75 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_322 = paddle._C_ops.reshape(add_127, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_110 = paddle._C_ops.transpose(reshape_322, [2, 0, 3, 1, 4]) + del reshape_322 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_60, full_0, float("0"), True) + del slice_60 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_111 = paddle._C_ops.transpose(slice_61, [0, 1, 3, 2]) + del slice_61 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_93 = paddle._C_ops.matmul(scale_18, transpose_111, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_161 = paddle._C_ops.reshape(data_9, full_int_array_15) + del data_9 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_18 = paddle._C_ops.index_select(data_10, reshape_161, 0) + del data_10 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_323 = paddle._C_ops.reshape(index_select_18, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_112 = paddle._C_ops.transpose(reshape_323, [2, 0, 1]) + del reshape_323 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_112, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_213 = paddle._C_ops.add(matmul_93, unsqueeze_27) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_18 = paddle._C_ops.softmax(add_213, -1) + del add_213 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_142 = paddle._C_ops.matmul(softmax_18, slice_18, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_113 = paddle._C_ops.transpose(matmul_142, [0, 2, 1, 3]) + del matmul_142 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_162 = paddle._C_ops.reshape(transpose_113, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_94 = paddle._C_ops.matmul(reshape_162, parameter_74, False, False) + del parameter_74 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_128 = paddle._C_ops.add(matmul_94, parameter_73) + del parameter_73 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_163 = paddle._C_ops.reshape(add_128, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_324 = paddle._C_ops.reshape(reshape_163, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_114 = paddle._C_ops.transpose(reshape_324, [0, 1, 3, 2, 4, 5]) + del reshape_324 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(transpose_114, full_int_array_62) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_165 = paddle._C_ops.reshape(reshape_164, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_19 = paddle._C_ops.full( + [], + float("0.921739"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_202 = full_19 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_34 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_214 = paddle._C_ops.add(full_19, uniform_34) + del uniform_34 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_34 = paddle._C_ops.floor(add_214) + del add_214 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_34 = paddle._C_ops.divide(reshape_165, full_19) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_34 = paddle._C_ops.multiply(divide_34, floor_34) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_129 = paddle._C_ops.add(add_126, multiply_34) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_129, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del parameter_70 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_130 = paddle._C_ops.add(matmul_95, parameter_69) + del parameter_69 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_18 = paddle._C_ops.gelu(add_130, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_96 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del parameter_68 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_131 = paddle._C_ops.add(matmul_96, parameter_67) + del parameter_67 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_35 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_215 = paddle._C_ops.add(full_19, uniform_35) + del uniform_35 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_35 = paddle._C_ops.floor(add_215) + del add_215 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_35 = paddle._C_ops.divide(add_131, full_19) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_35 = paddle._C_ops.multiply(divide_35, floor_35) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_132 = paddle._C_ops.add(add_129, multiply_35) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_132, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(layer_norm_123, full_int_array_55) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_166, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_325 = paddle._C_ops.reshape(roll_18, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_115 = paddle._C_ops.transpose(reshape_325, [0, 1, 3, 2, 4, 5]) + del reshape_325 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_167 = paddle._C_ops.reshape(transpose_115, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_168 = paddle._C_ops.reshape(reshape_167, full_int_array_58) + + # pd_op.full: (1x24x24x1xf32) <- () + full_43 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_43, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_43 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_326 = paddle._C_ops.reshape(set_value__9, full_int_array_64) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_155 = paddle._C_ops.transpose(reshape_326, [0, 1, 3, 2, 4, 5]) + del reshape_326 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(transpose_155, full_int_array_36) + del transpose_155 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_328 = paddle._C_ops.reshape(reshape_327, full_int_array_37) + del reshape_327 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_2) + del reshape_328 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_64, unsqueeze_65) + del unsqueeze_64, unsqueeze_65 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_35, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_19 = paddle._C_ops.where(equal_9, full_36, where_18) + del equal_9, where_18 + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_97 = paddle._C_ops.matmul(reshape_168, parameter_64, False, False) + del parameter_64 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_133 = paddle._C_ops.add(matmul_97, parameter_63) + del parameter_63 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_329 = paddle._C_ops.reshape(add_133, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_116 = paddle._C_ops.transpose(reshape_329, [2, 0, 3, 1, 4]) + del reshape_329 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_62, full_0, float("0"), True) + del slice_62 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_117 = paddle._C_ops.transpose(slice_63, [0, 1, 3, 2]) + del slice_63 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_98 = paddle._C_ops.matmul(scale_19, transpose_117, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_169 = paddle._C_ops.reshape(data_11, full_int_array_15) + del data_11 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_19 = paddle._C_ops.index_select(data_12, reshape_169, 0) + del data_12 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_330 = paddle._C_ops.reshape(index_select_19, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_118 = paddle._C_ops.transpose(reshape_330, [2, 0, 1]) + del reshape_330 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(transpose_118, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_134 = paddle._C_ops.add(matmul_98, unsqueeze_28) + + # pd_op.reshape: (4x4x24x144x144xf32) <- (16x24x144x144xf32, 5xi64) + reshape_170 = paddle._C_ops.reshape(add_134, full_int_array_65) + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(where_19, full_int_array_1) + del where_19 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_66, full_int_array_0) + del unsqueeze_66 + + # pd_op.add: (4x4x24x144x144xf32) <- (4x4x24x144x144xf32, 1x4x1x144x144xf32) + add_135 = paddle._C_ops.add(reshape_170, unsqueeze_29) + + # pd_op.reshape: (16x24x144x144xf32) <- (4x4x24x144x144xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(add_135, full_int_array_66) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_19 = paddle._C_ops.softmax(reshape_331, -1) + del reshape_331 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_19, slice_19, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_119 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_171 = paddle._C_ops.reshape(transpose_119, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_99 = paddle._C_ops.matmul(reshape_171, parameter_62, False, False) + del parameter_62 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_136 = paddle._C_ops.add(matmul_99, parameter_61) + del parameter_61 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_172 = paddle._C_ops.reshape(add_136, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_332 = paddle._C_ops.reshape(reshape_172, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_120 = paddle._C_ops.transpose(reshape_332, [0, 1, 3, 2, 4, 5]) + del reshape_332 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_173 = paddle._C_ops.reshape(transpose_120, full_int_array_62) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_173, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_174 = paddle._C_ops.reshape(roll_19, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_20 = paddle._C_ops.full( + [], + float("0.917391"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_213 = full_20 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_36 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_216 = paddle._C_ops.add(full_20, uniform_36) + del uniform_36 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_36 = paddle._C_ops.floor(add_216) + del add_216 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_36 = paddle._C_ops.divide(reshape_174, full_20) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_36 = paddle._C_ops.multiply(divide_36, floor_36) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_137 = paddle._C_ops.add(add_132, multiply_36) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_137, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_100 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del parameter_58 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_138 = paddle._C_ops.add(matmul_100, parameter_57) + del parameter_57 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_19 = paddle._C_ops.gelu(add_138, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_101 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del parameter_56 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_139 = paddle._C_ops.add(matmul_101, parameter_55) + del parameter_55 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_37 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_217 = paddle._C_ops.add(full_20, uniform_37) + del uniform_37 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_37 = paddle._C_ops.floor(add_217) + del add_217 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_37 = paddle._C_ops.divide(add_139, full_20) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_37 = paddle._C_ops.multiply(divide_37, floor_37) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_140 = paddle._C_ops.add(add_137, multiply_37) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(layer_norm_129, full_int_array_55) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_333 = paddle._C_ops.reshape(reshape_175, full_int_array_56) + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_121 = paddle._C_ops.transpose(reshape_333, [0, 1, 3, 2, 4, 5]) + del reshape_333 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_176 = paddle._C_ops.reshape(transpose_121, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_177 = paddle._C_ops.reshape(reshape_176, full_int_array_58) + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_102 = paddle._C_ops.matmul(reshape_177, parameter_52, False, False) + del parameter_52 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_141 = paddle._C_ops.add(matmul_102, parameter_51) + del parameter_51 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_334 = paddle._C_ops.reshape(add_141, full_int_array_59) + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_122 = paddle._C_ops.transpose(reshape_334, [2, 0, 3, 1, 4]) + del reshape_334 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_64, full_0, float("0"), True) + del slice_64 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_123 = paddle._C_ops.transpose(slice_65, [0, 1, 3, 2]) + del slice_65 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_103 = paddle._C_ops.matmul(scale_20, transpose_123, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_178 = paddle._C_ops.reshape(data_13, full_int_array_15) + del data_13 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_20 = paddle._C_ops.index_select(data_14, reshape_178, 0) + del data_14 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_20, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_124 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_124, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_218 = paddle._C_ops.add(matmul_103, unsqueeze_30) + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_20 = paddle._C_ops.softmax(add_218, -1) + del add_218 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_144 = paddle._C_ops.matmul(softmax_20, slice_20, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_125 = paddle._C_ops.transpose(matmul_144, [0, 2, 1, 3]) + del matmul_144 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_179 = paddle._C_ops.reshape(transpose_125, full_int_array_60) + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_104 = paddle._C_ops.matmul(reshape_179, parameter_50, False, False) + del parameter_50 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_142 = paddle._C_ops.add(matmul_104, parameter_49) + del parameter_49 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_180 = paddle._C_ops.reshape(add_142, full_int_array_57) + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_336 = paddle._C_ops.reshape(reshape_180, full_int_array_61) + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_126 = paddle._C_ops.transpose(reshape_336, [0, 1, 3, 2, 4, 5]) + del reshape_336 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(transpose_126, full_int_array_62) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_21 = paddle._C_ops.full( + [], + float("0.913043"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_222 = full_21 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_38 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_219 = paddle._C_ops.add(full_21, uniform_38) + del uniform_38 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_38 = paddle._C_ops.floor(add_219) + del add_219 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_38 = paddle._C_ops.divide(reshape_182, full_21) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_38 = paddle._C_ops.multiply(divide_38, floor_38) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_143 = paddle._C_ops.add(add_140, multiply_38) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_105 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del parameter_46 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_144 = paddle._C_ops.add(matmul_105, parameter_45) + del parameter_45 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_20 = paddle._C_ops.gelu(add_144, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_106 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del parameter_44 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_145 = paddle._C_ops.add(matmul_106, parameter_43) + del parameter_43 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_39 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_220 = paddle._C_ops.add(full_21, uniform_39) + del uniform_39 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_39 = paddle._C_ops.floor(add_220) + del add_220 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_39 = paddle._C_ops.divide(add_145, full_21) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_39 = paddle._C_ops.multiply(divide_39, floor_39) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_146 = paddle._C_ops.add(add_143, multiply_39) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_146, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(layer_norm_135, full_int_array_55) + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_183, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x2x12x2x12x768xf32) <- (4x24x24x768xf32, 6xi64) + reshape_337 = paddle._C_ops.reshape(roll_20, full_int_array_56) + del full_int_array_56 + + # pd_op.transpose: (4x2x2x12x12x768xf32) <- (4x2x12x2x12x768xf32) + transpose_127 = paddle._C_ops.transpose(reshape_337, [0, 1, 3, 2, 4, 5]) + del reshape_337 + + # pd_op.reshape: (16x12x12x768xf32) <- (4x2x2x12x12x768xf32, 4xi64) + reshape_184 = paddle._C_ops.reshape(transpose_127, full_int_array_57) + + # pd_op.reshape: (16x144x768xf32) <- (16x12x12x768xf32, 3xi64) + reshape_185 = paddle._C_ops.reshape(reshape_184, full_int_array_58) + del full_int_array_58 + + # pd_op.full: (1x24x24x1xf32) <- () + full_44 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_44, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_44 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(set_value__10, full_int_array_64) + del full_int_array_64 + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_156 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_156, full_int_array_36) + del transpose_156 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, full_int_array_37) + del reshape_339 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_2) + del reshape_340 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_35, subtract_10) + del full_35, not_equal_10, subtract_10 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_26) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_21 = paddle._C_ops.where(equal_10, full_36, where_20) + del equal_10, full_36, where_20 + + # pd_op.matmul: (16x144x2304xf32) <- (16x144x768xf32, 768x2304xf32) + matmul_107 = paddle._C_ops.matmul(reshape_185, parameter_40, False, False) + del parameter_40 + + # pd_op.add: (16x144x2304xf32) <- (16x144x2304xf32, 2304xf32) + add_147 = paddle._C_ops.add(matmul_107, parameter_39) + del parameter_39 + + # pd_op.reshape: (16x144x3x24x32xf32) <- (16x144x2304xf32, 5xi64) + reshape_341 = paddle._C_ops.reshape(add_147, full_int_array_59) + del full_int_array_59 + + # pd_op.transpose: (3x16x24x144x32xf32) <- (16x144x3x24x32xf32) + transpose_128 = paddle._C_ops.transpose(reshape_341, [2, 0, 3, 1, 4]) + del reshape_341 + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (16x24x144x32xf32) <- (3x16x24x144x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (16x24x144x32xf32) <- (16x24x144x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_66, full_0, float("0"), True) + del slice_66 + + # pd_op.transpose: (16x24x32x144xf32) <- (16x24x144x32xf32) + transpose_129 = paddle._C_ops.transpose(slice_67, [0, 1, 3, 2]) + del slice_67 + + # pd_op.matmul: (16x24x144x144xf32) <- (16x24x144x32xf32, 16x24x32x144xf32) + matmul_108 = paddle._C_ops.matmul(scale_21, transpose_129, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_15, full_int_array_15) + del data_15 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_21 = paddle._C_ops.index_select(data_16, reshape_186, 0) + del data_16 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_342 = paddle._C_ops.reshape(index_select_21, full_int_array_16) + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_130 = paddle._C_ops.transpose(reshape_342, [2, 0, 1]) + del reshape_342 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(transpose_130, full_int_array_0) + + # pd_op.add: (16x24x144x144xf32) <- (16x24x144x144xf32, 1x24x144x144xf32) + add_148 = paddle._C_ops.add(matmul_108, unsqueeze_31) + + # pd_op.reshape: (4x4x24x144x144xf32) <- (16x24x144x144xf32, 5xi64) + reshape_187 = paddle._C_ops.reshape(add_148, full_int_array_65) + del full_int_array_65 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(where_21, full_int_array_1) + del where_21 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(unsqueeze_69, full_int_array_0) + del unsqueeze_69 + + # pd_op.add: (4x4x24x144x144xf32) <- (4x4x24x144x144xf32, 1x4x1x144x144xf32) + add_149 = paddle._C_ops.add(reshape_187, unsqueeze_32) + + # pd_op.reshape: (16x24x144x144xf32) <- (4x4x24x144x144xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(add_149, full_int_array_66) + del full_int_array_66 + + # pd_op.softmax: (16x24x144x144xf32) <- (16x24x144x144xf32) + softmax_21 = paddle._C_ops.softmax(reshape_343, -1) + del reshape_343 + + # pd_op.matmul: (16x24x144x32xf32) <- (16x24x144x144xf32, 16x24x144x32xf32) + matmul_145 = paddle._C_ops.matmul(softmax_21, slice_21, False, False) + + # pd_op.transpose: (16x144x24x32xf32) <- (16x24x144x32xf32) + transpose_131 = paddle._C_ops.transpose(matmul_145, [0, 2, 1, 3]) + del matmul_145 + + # pd_op.reshape: (16x144x768xf32) <- (16x144x24x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_131, full_int_array_60) + del full_int_array_60 + + # pd_op.matmul: (16x144x768xf32) <- (16x144x768xf32, 768x768xf32) + matmul_109 = paddle._C_ops.matmul(reshape_188, parameter_38, False, False) + del parameter_38 + + # pd_op.add: (16x144x768xf32) <- (16x144x768xf32, 768xf32) + add_150 = paddle._C_ops.add(matmul_109, parameter_37) + del parameter_37 + + # pd_op.reshape: (16x12x12x768xf32) <- (16x144x768xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_150, full_int_array_57) + del full_int_array_57 + + # pd_op.reshape: (4x2x2x12x12x768xf32) <- (16x12x12x768xf32, 6xi64) + reshape_344 = paddle._C_ops.reshape(reshape_189, full_int_array_61) + del full_int_array_61 + + # pd_op.transpose: (4x2x12x2x12x768xf32) <- (4x2x2x12x12x768xf32) + transpose_132 = paddle._C_ops.transpose(reshape_344, [0, 1, 3, 2, 4, 5]) + del reshape_344 + + # pd_op.reshape: (4x24x24x768xf32) <- (4x2x12x2x12x768xf32, 4xi64) + reshape_190 = paddle._C_ops.reshape(transpose_132, full_int_array_62) + del full_int_array_62 + + # pd_op.roll: (4x24x24x768xf32) <- (4x24x24x768xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_190, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x576x768xf32) <- (4x24x24x768xf32, 3xi64) + reshape_191 = paddle._C_ops.reshape(roll_21, full_int_array_63) + del full_int_array_63 + + # pd_op.full: (xf32) <- () + full_22 = paddle._C_ops.full( + [], + float("0.908696"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_233 = full_22 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_40 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_221 = paddle._C_ops.add(full_22, uniform_40) + del uniform_40 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_40 = paddle._C_ops.floor(add_221) + del add_221 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_40 = paddle._C_ops.divide(reshape_191, full_22) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_40 = paddle._C_ops.multiply(divide_40, floor_40) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_151 = paddle._C_ops.add(add_146, multiply_40) + + # pd_op.layer_norm: (4x576x768xf32, 4x576xf32, 4x576xf32) <- (4x576x768xf32, 768xf32, 768xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (4x576x3072xf32) <- (4x576x768xf32, 768x3072xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del parameter_34 + + # pd_op.add: (4x576x3072xf32) <- (4x576x3072xf32, 3072xf32) + add_152 = paddle._C_ops.add(matmul_110, parameter_33) + del parameter_33 + + # pd_op.gelu: (4x576x3072xf32) <- (4x576x3072xf32) + gelu_21 = paddle._C_ops.gelu(add_152, False) + + # pd_op.matmul: (4x576x768xf32) <- (4x576x3072xf32, 3072x768xf32) + matmul_111 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del parameter_32 + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 768xf32) + add_153 = paddle._C_ops.add(matmul_111, parameter_31) + del parameter_31 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_41 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_222 = paddle._C_ops.add(full_22, uniform_41) + del uniform_41 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_41 = paddle._C_ops.floor(add_222) + del add_222 + + # pd_op.divide: (4x576x768xf32) <- (4x576x768xf32, xf32) + divide_41 = paddle._C_ops.divide(add_153, full_22) + + # pd_op.multiply: (4x576x768xf32) <- (4x576x768xf32, 4x1x1xf32) + multiply_41 = paddle._C_ops.multiply(divide_41, floor_41) + + # pd_op.add: (4x576x768xf32) <- (4x576x768xf32, 4x576x768xf32) + add_154 = paddle._C_ops.add(add_151, multiply_41) + + # pd_op.reshape: (4x24x24x768xf32) <- (4x576x768xf32, 4xi64) + reshape_192 = paddle._C_ops.reshape(add_154, full_int_array_55) + + # pd_op.strided_slice: (4x12x12x768xf32) <- (4x24x24x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x12x12x768xf32) <- (4x24x24x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x12x12x768xf32) <- (4x24x24x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (4x12x12x768xf32) <- (4x24x24x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (4x24x24x768xf32) <- (4x24x24x768xf32, 4xi64) + reshape_345 = paddle._C_ops.reshape(reshape_192, full_int_array_55) + del full_int_array_55 + + # builtin.combine: ([4x12x12x768xf32, 4x12x12x768xf32, 4x12x12x768xf32, 4x12x12x768xf32]) <- (4x12x12x768xf32, 4x12x12x768xf32, 4x12x12x768xf32, 4x12x12x768xf32) + combine_2 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + + # pd_op.concat: (4x12x12x3072xf32) <- ([4x12x12x768xf32, 4x12x12x768xf32, 4x12x12x768xf32, 4x12x12x768xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_2, full_2) + del combine_2 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_67 = [4, -1, 3072] + + # pd_op.reshape: (4x144x3072xf32) <- (4x12x12x3072xf32, 3xi64) + reshape_193 = paddle._C_ops.reshape(concat_2, full_int_array_67) + del full_int_array_67 + + # pd_op.layer_norm: (4x144x3072xf32, 4x144xf32, 4x144xf32) <- (4x144x3072xf32, 3072xf32, 3072xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_193, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30 + + # pd_op.matmul: (4x144x1536xf32) <- (4x144x3072xf32, 3072x1536xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del parameter_28 + + # pd_op.layer_norm: (4x144x1536xf32, 4x144xf32, 4x144xf32) <- (4x144x1536xf32, 1536xf32, 1536xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_112, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_68 = [4, 12, 12, 1536] + + # pd_op.reshape: (4x12x12x1536xf32) <- (4x144x1536xf32, 4xi64) + reshape_194 = paddle._C_ops.reshape(layer_norm_144, full_int_array_68) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_69 = [4, 1, 12, 1, 12, 1536] + + # pd_op.reshape: (4x1x12x1x12x1536xf32) <- (4x12x12x1536xf32, 6xi64) + reshape_346 = paddle._C_ops.reshape(reshape_194, full_int_array_69) + + # pd_op.transpose: (4x1x1x12x12x1536xf32) <- (4x1x12x1x12x1536xf32) + transpose_133 = paddle._C_ops.transpose(reshape_346, [0, 1, 3, 2, 4, 5]) + del reshape_346 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_70 = [-1, 12, 12, 1536] + + # pd_op.reshape: (4x12x12x1536xf32) <- (4x1x1x12x12x1536xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_133, full_int_array_70) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_71 = [-1, 144, 1536] + + # pd_op.reshape: (4x144x1536xf32) <- (4x12x12x1536xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_71) + + # pd_op.matmul: (4x144x4608xf32) <- (4x144x1536xf32, 1536x4608xf32) + matmul_113 = paddle._C_ops.matmul(reshape_196, parameter_25, False, False) + del parameter_25 + + # pd_op.add: (4x144x4608xf32) <- (4x144x4608xf32, 4608xf32) + add_155 = paddle._C_ops.add(matmul_113, parameter_24) + del parameter_24 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_72 = [4, 144, 3, 48, 32] + + # pd_op.reshape: (4x144x3x48x32xf32) <- (4x144x4608xf32, 5xi64) + reshape_347 = paddle._C_ops.reshape(add_155, full_int_array_72) + + # pd_op.transpose: (3x4x48x144x32xf32) <- (4x144x3x48x32xf32) + transpose_134 = paddle._C_ops.transpose(reshape_347, [2, 0, 3, 1, 4]) + del reshape_347 + + # pd_op.slice: (4x48x144x32xf32) <- (3x4x48x144x32xf32, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (4x48x144x32xf32) <- (3x4x48x144x32xf32, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (4x48x144x32xf32) <- (3x4x48x144x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (4x48x144x32xf32) <- (4x48x144x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_68, full_0, float("0"), True) + del slice_68 + + # pd_op.transpose: (4x48x32x144xf32) <- (4x48x144x32xf32) + transpose_135 = paddle._C_ops.transpose(slice_69, [0, 1, 3, 2]) + del slice_69 + + # pd_op.matmul: (4x48x144x144xf32) <- (4x48x144x32xf32, 4x48x32x144xf32) + matmul_114 = paddle._C_ops.matmul(scale_22, transpose_135, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_197 = paddle._C_ops.reshape(data_18, full_int_array_15) + del data_18 + + # pd_op.index_select: (20736x48xf32) <- (529x48xf32, 20736xi64) + index_select_22 = paddle._C_ops.index_select(data_19, reshape_197, 0) + del data_19 + + # pd_op.reshape: (144x144x48xf32) <- (20736x48xf32, 3xi64) + reshape_348 = paddle._C_ops.reshape(index_select_22, full_int_array_16) + + # pd_op.transpose: (48x144x144xf32) <- (144x144x48xf32) + transpose_136 = paddle._C_ops.transpose(reshape_348, [2, 0, 1]) + del reshape_348 + + # pd_op.unsqueeze: (1x48x144x144xf32) <- (48x144x144xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_136, full_int_array_0) + + # pd_op.add: (4x48x144x144xf32) <- (4x48x144x144xf32, 1x48x144x144xf32) + add_223 = paddle._C_ops.add(matmul_114, unsqueeze_33) + + # pd_op.softmax: (4x48x144x144xf32) <- (4x48x144x144xf32) + softmax_22 = paddle._C_ops.softmax(add_223, -1) + del add_223 + + # pd_op.matmul: (4x48x144x32xf32) <- (4x48x144x144xf32, 4x48x144x32xf32) + matmul_146 = paddle._C_ops.matmul(softmax_22, slice_22, False, False) + + # pd_op.transpose: (4x144x48x32xf32) <- (4x48x144x32xf32) + transpose_137 = paddle._C_ops.transpose(matmul_146, [0, 2, 1, 3]) + del matmul_146 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_73 = [4, 144, 1536] + + # pd_op.reshape: (4x144x1536xf32) <- (4x144x48x32xf32, 3xi64) + reshape_198 = paddle._C_ops.reshape(transpose_137, full_int_array_73) + + # pd_op.matmul: (4x144x1536xf32) <- (4x144x1536xf32, 1536x1536xf32) + matmul_115 = paddle._C_ops.matmul(reshape_198, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (4x144x1536xf32) <- (4x144x1536xf32, 1536xf32) + add_156 = paddle._C_ops.add(matmul_115, parameter_22) + del parameter_22 + + # pd_op.reshape: (4x12x12x1536xf32) <- (4x144x1536xf32, 4xi64) + reshape_199 = paddle._C_ops.reshape(add_156, full_int_array_70) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_74 = [-1, 1, 1, 12, 12, 1536] + + # pd_op.reshape: (4x1x1x12x12x1536xf32) <- (4x12x12x1536xf32, 6xi64) + reshape_349 = paddle._C_ops.reshape(reshape_199, full_int_array_74) + + # pd_op.transpose: (4x1x12x1x12x1536xf32) <- (4x1x1x12x12x1536xf32) + transpose_138 = paddle._C_ops.transpose(reshape_349, [0, 1, 3, 2, 4, 5]) + del reshape_349 + + # pd_op.reshape: (4x12x12x1536xf32) <- (4x1x12x1x12x1536xf32, 4xi64) + reshape_200 = paddle._C_ops.reshape(transpose_138, full_int_array_70) + + # pd_op.reshape: (4x144x1536xf32) <- (4x12x12x1536xf32, 3xi64) + reshape_201 = paddle._C_ops.reshape(reshape_200, full_int_array_73) + + # pd_op.full: (xf32) <- () + full_23 = paddle._C_ops.full( + [], + float("0.904348"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_255 = full_23 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_42 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_224 = paddle._C_ops.add(full_23, uniform_42) + del uniform_42 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_42 = paddle._C_ops.floor(add_224) + del add_224 + + # pd_op.divide: (4x144x1536xf32) <- (4x144x1536xf32, xf32) + divide_42 = paddle._C_ops.divide(reshape_201, full_23) + + # pd_op.multiply: (4x144x1536xf32) <- (4x144x1536xf32, 4x1x1xf32) + multiply_42 = paddle._C_ops.multiply(divide_42, floor_42) + + # pd_op.add: (4x144x1536xf32) <- (4x144x1536xf32, 4x144x1536xf32) + add_157 = paddle._C_ops.add(matmul_112, multiply_42) + + # pd_op.layer_norm: (4x144x1536xf32, 4x144xf32, 4x144xf32) <- (4x144x1536xf32, 1536xf32, 1536xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_157, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (4x144x6144xf32) <- (4x144x1536xf32, 1536x6144xf32) + matmul_116 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del parameter_19 + + # pd_op.add: (4x144x6144xf32) <- (4x144x6144xf32, 6144xf32) + add_158 = paddle._C_ops.add(matmul_116, parameter_18) + del parameter_18 + + # pd_op.gelu: (4x144x6144xf32) <- (4x144x6144xf32) + gelu_22 = paddle._C_ops.gelu(add_158, False) + + # pd_op.matmul: (4x144x1536xf32) <- (4x144x6144xf32, 6144x1536xf32) + matmul_117 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (4x144x1536xf32) <- (4x144x1536xf32, 1536xf32) + add_159 = paddle._C_ops.add(matmul_117, parameter_16) + del parameter_16 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_43 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_225 = paddle._C_ops.add(full_23, uniform_43) + del uniform_43 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_43 = paddle._C_ops.floor(add_225) + del add_225 + + # pd_op.divide: (4x144x1536xf32) <- (4x144x1536xf32, xf32) + divide_43 = paddle._C_ops.divide(add_159, full_23) + + # pd_op.multiply: (4x144x1536xf32) <- (4x144x1536xf32, 4x1x1xf32) + multiply_43 = paddle._C_ops.multiply(divide_43, floor_43) + + # pd_op.add: (4x144x1536xf32) <- (4x144x1536xf32, 4x144x1536xf32) + add_160 = paddle._C_ops.add(add_157, multiply_43) + + # pd_op.layer_norm: (4x144x1536xf32, 4x144xf32, 4x144xf32) <- (4x144x1536xf32, 1536xf32, 1536xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_160, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # pd_op.reshape: (4x12x12x1536xf32) <- (4x144x1536xf32, 4xi64) + reshape_202 = paddle._C_ops.reshape(layer_norm_150, full_int_array_68) + del full_int_array_68 + + # pd_op.roll: (4x12x12x1536xf32) <- (4x12x12x1536xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_202, full_int_array_4, [1, 2]) + + # pd_op.reshape: (4x1x12x1x12x1536xf32) <- (4x12x12x1536xf32, 6xi64) + reshape_350 = paddle._C_ops.reshape(roll_22, full_int_array_69) + del full_int_array_69 + + # pd_op.transpose: (4x1x1x12x12x1536xf32) <- (4x1x12x1x12x1536xf32) + transpose_139 = paddle._C_ops.transpose(reshape_350, [0, 1, 3, 2, 4, 5]) + del reshape_350 + + # pd_op.reshape: (4x12x12x1536xf32) <- (4x1x1x12x12x1536xf32, 4xi64) + reshape_203 = paddle._C_ops.reshape(transpose_139, full_int_array_70) + + # pd_op.reshape: (4x144x1536xf32) <- (4x12x12x1536xf32, 3xi64) + reshape_204 = paddle._C_ops.reshape(reshape_203, full_int_array_71) + del full_int_array_71 + + # pd_op.full: (1x12x12x1xf32) <- () + full_45 = paddle._C_ops.full( + [1, 12, 12, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_45, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_45, full_int_array_21 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_24, set_value__100 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_26, full_int_array_27, set_value__101 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_28, set_value__102 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_22, set_value__103 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_25, full_int_array_30, set_value__104 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_31, full_int_array_32, set_value__105 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_29, full_int_array_33, set_value__106 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_34, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_75 = [1, 1, 12, 1, 12, 1] + + # pd_op.reshape: (1x1x12x1x12x1xf32) <- (1x12x12x1xf32, 6xi64) + reshape_351 = paddle._C_ops.reshape(set_value__11, full_int_array_75) + del full_int_array_75 + + # pd_op.transpose: (1x1x1x12x12x1xf32) <- (1x1x12x1x12x1xf32) + transpose_157 = paddle._C_ops.transpose(reshape_351, [0, 1, 3, 2, 4, 5]) + del reshape_351 + + # pd_op.reshape: (1x12x12x1xf32) <- (1x1x1x12x12x1xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(transpose_157, full_int_array_36) + del full_int_array_36, transpose_157 + + # pd_op.reshape: (1x144xf32) <- (1x12x12x1xf32, 2xi64) + reshape_353 = paddle._C_ops.reshape(reshape_352, full_int_array_37) + del full_int_array_37, reshape_352 + + # pd_op.unsqueeze: (1x1x144xf32) <- (1x144xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_1) + + # pd_op.unsqueeze: (1x144x1xf32) <- (1x144xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_2) + del reshape_353 + + # pd_op.subtract: (1x144x144xf32) <- (1x1x144xf32, 1x144x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_70, unsqueeze_71) + del unsqueeze_70, unsqueeze_71 + + # pd_op.not_equal: (1x144x144xb) <- (1x144x144xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_26) + + # pd_op.full: (1x144x144xf32) <- () + full_46 = paddle._C_ops.full( + [1, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x144x144xf32) <- (1x144x144xb, 1x144x144xf32, 1x144x144xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_46, subtract_11) + del full_46, not_equal_11, subtract_11 + + # pd_op.equal: (1x144x144xb) <- (1x144x144xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_26) + del full_26 + + # pd_op.full: (1x144x144xf32) <- () + full_47 = paddle._C_ops.full( + [1, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x144x144xf32) <- (1x144x144xb, 1x144x144xf32, 1x144x144xf32) + where_23 = paddle._C_ops.where(equal_11, full_47, where_22) + del equal_11, full_47, where_22 + + # pd_op.matmul: (4x144x4608xf32) <- (4x144x1536xf32, 1536x4608xf32) + matmul_118 = paddle._C_ops.matmul(reshape_204, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (4x144x4608xf32) <- (4x144x4608xf32, 4608xf32) + add_161 = paddle._C_ops.add(matmul_118, parameter_12) + del parameter_12 + + # pd_op.reshape: (4x144x3x48x32xf32) <- (4x144x4608xf32, 5xi64) + reshape_354 = paddle._C_ops.reshape(add_161, full_int_array_72) + del full_int_array_72 + + # pd_op.transpose: (3x4x48x144x32xf32) <- (4x144x3x48x32xf32) + transpose_140 = paddle._C_ops.transpose(reshape_354, [2, 0, 3, 1, 4]) + del reshape_354 + + # pd_op.slice: (4x48x144x32xf32) <- (3x4x48x144x32xf32, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (4x48x144x32xf32) <- (3x4x48x144x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (4x48x144x32xf32) <- (3x4x48x144x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (4x48x144x32xf32) <- (4x48x144x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_70, full_0, float("0"), True) + del slice_70 + + # pd_op.transpose: (4x48x32x144xf32) <- (4x48x144x32xf32) + transpose_141 = paddle._C_ops.transpose(slice_71, [0, 1, 3, 2]) + del slice_71 + + # pd_op.matmul: (4x48x144x144xf32) <- (4x48x144x32xf32, 4x48x32x144xf32) + matmul_119 = paddle._C_ops.matmul(scale_23, transpose_141, False, False) + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_205 = paddle._C_ops.reshape(data_21, full_int_array_15) + del data_21, full_int_array_15 + + # pd_op.index_select: (20736x48xf32) <- (529x48xf32, 20736xi64) + index_select_23 = paddle._C_ops.index_select(data_22, reshape_205, 0) + del data_22 + + # pd_op.reshape: (144x144x48xf32) <- (20736x48xf32, 3xi64) + reshape_355 = paddle._C_ops.reshape(index_select_23, full_int_array_16) + del full_int_array_16 + + # pd_op.transpose: (48x144x144xf32) <- (144x144x48xf32) + transpose_142 = paddle._C_ops.transpose(reshape_355, [2, 0, 1]) + del reshape_355 + + # pd_op.unsqueeze: (1x48x144x144xf32) <- (48x144x144xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(transpose_142, full_int_array_0) + + # pd_op.add: (4x48x144x144xf32) <- (4x48x144x144xf32, 1x48x144x144xf32) + add_162 = paddle._C_ops.add(matmul_119, unsqueeze_34) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_76 = [4, 1, 48, 144, 144] + + # pd_op.reshape: (4x1x48x144x144xf32) <- (4x48x144x144xf32, 5xi64) + reshape_206 = paddle._C_ops.reshape(add_162, full_int_array_76) + del full_int_array_76 + + # pd_op.unsqueeze: (1x1x144x144xf32) <- (1x144x144xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(where_23, full_int_array_1) + del where_23 + + # pd_op.unsqueeze: (1x1x1x144x144xf32) <- (1x1x144x144xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_72, full_int_array_0) + del unsqueeze_72 + + # pd_op.add: (4x1x48x144x144xf32) <- (4x1x48x144x144xf32, 1x1x1x144x144xf32) + add_163 = paddle._C_ops.add(reshape_206, unsqueeze_35) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_77 = [4, 48, 144, 144] + + # pd_op.reshape: (4x48x144x144xf32) <- (4x1x48x144x144xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(add_163, full_int_array_77) + del full_int_array_77 + + # pd_op.softmax: (4x48x144x144xf32) <- (4x48x144x144xf32) + softmax_23 = paddle._C_ops.softmax(reshape_356, -1) + del reshape_356 + + # pd_op.matmul: (4x48x144x32xf32) <- (4x48x144x144xf32, 4x48x144x32xf32) + matmul_147 = paddle._C_ops.matmul(softmax_23, slice_23, False, False) + + # pd_op.transpose: (4x144x48x32xf32) <- (4x48x144x32xf32) + transpose_143 = paddle._C_ops.transpose(matmul_147, [0, 2, 1, 3]) + del matmul_147 + + # pd_op.reshape: (4x144x1536xf32) <- (4x144x48x32xf32, 3xi64) + reshape_207 = paddle._C_ops.reshape(transpose_143, full_int_array_73) + + # pd_op.matmul: (4x144x1536xf32) <- (4x144x1536xf32, 1536x1536xf32) + matmul_120 = paddle._C_ops.matmul(reshape_207, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (4x144x1536xf32) <- (4x144x1536xf32, 1536xf32) + add_164 = paddle._C_ops.add(matmul_120, parameter_10) + del parameter_10 + + # pd_op.reshape: (4x12x12x1536xf32) <- (4x144x1536xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(add_164, full_int_array_70) + + # pd_op.reshape: (4x1x1x12x12x1536xf32) <- (4x12x12x1536xf32, 6xi64) + reshape_357 = paddle._C_ops.reshape(reshape_208, full_int_array_74) + del full_int_array_74 + + # pd_op.transpose: (4x1x12x1x12x1536xf32) <- (4x1x1x12x12x1536xf32) + transpose_144 = paddle._C_ops.transpose(reshape_357, [0, 1, 3, 2, 4, 5]) + del reshape_357 + + # pd_op.reshape: (4x12x12x1536xf32) <- (4x1x12x1x12x1536xf32, 4xi64) + reshape_209 = paddle._C_ops.reshape(transpose_144, full_int_array_70) + del full_int_array_70 + + # pd_op.roll: (4x12x12x1536xf32) <- (4x12x12x1536xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_209, full_int_array_5, [1, 2]) + + # pd_op.reshape: (4x144x1536xf32) <- (4x12x12x1536xf32, 3xi64) + reshape_210 = paddle._C_ops.reshape(roll_23, full_int_array_73) + del full_int_array_73 + + # pd_op.full: (xf32) <- () + full_24 = paddle._C_ops.full( + [], float("0.9"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_266 = full_24 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_44 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_226 = paddle._C_ops.add(full_24, uniform_44) + del uniform_44 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_44 = paddle._C_ops.floor(add_226) + del add_226 + + # pd_op.divide: (4x144x1536xf32) <- (4x144x1536xf32, xf32) + divide_44 = paddle._C_ops.divide(reshape_210, full_24) + + # pd_op.multiply: (4x144x1536xf32) <- (4x144x1536xf32, 4x1x1xf32) + multiply_44 = paddle._C_ops.multiply(divide_44, floor_44) + + # pd_op.add: (4x144x1536xf32) <- (4x144x1536xf32, 4x144x1536xf32) + add_165 = paddle._C_ops.add(add_160, multiply_44) + + # pd_op.layer_norm: (4x144x1536xf32, 4x144xf32, 4x144xf32) <- (4x144x1536xf32, 1536xf32, 1536xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_165, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (4x144x6144xf32) <- (4x144x1536xf32, 1536x6144xf32) + matmul_121 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (4x144x6144xf32) <- (4x144x6144xf32, 6144xf32) + add_166 = paddle._C_ops.add(matmul_121, parameter_6) + del parameter_6 + + # pd_op.gelu: (4x144x6144xf32) <- (4x144x6144xf32) + gelu_23 = paddle._C_ops.gelu(add_166, False) + + # pd_op.matmul: (4x144x1536xf32) <- (4x144x6144xf32, 6144x1536xf32) + matmul_122 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del parameter_5 + + # pd_op.add: (4x144x1536xf32) <- (4x144x1536xf32, 1536xf32) + add_167 = paddle._C_ops.add(matmul_122, parameter_4) + del parameter_4 + + # pd_op.uniform: (4x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_45 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + del full_29, full_30, full_int_array_40 + + # pd_op.add: (4x1x1xf32) <- (xf32, 4x1x1xf32) + add_227 = paddle._C_ops.add(full_24, uniform_45) + del uniform_45 + + # pd_op.floor: (4x1x1xf32) <- (4x1x1xf32) + floor_45 = paddle._C_ops.floor(add_227) + del add_227 + + # pd_op.divide: (4x144x1536xf32) <- (4x144x1536xf32, xf32) + divide_45 = paddle._C_ops.divide(add_167, full_24) + + # pd_op.multiply: (4x144x1536xf32) <- (4x144x1536xf32, 4x1x1xf32) + multiply_45 = paddle._C_ops.multiply(divide_45, floor_45) + + # pd_op.add: (4x144x1536xf32) <- (4x144x1536xf32, 4x144x1536xf32) + add_168 = paddle._C_ops.add(add_165, multiply_45) + + # pd_op.layer_norm: (4x144x1536xf32, 4x144xf32, 4x144xf32) <- (4x144x1536xf32, 1536xf32, 1536xf32) + layer_norm_158, layer_norm_156, layer_norm_157 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_168, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_2, parameter_3 + + # pd_op.transpose: (4x1536x144xf32) <- (4x144x1536xf32) + transpose_145 = paddle._C_ops.transpose(layer_norm_158, [0, 2, 1]) + del layer_norm_158 + + # pd_op.unsqueeze: (4x1536x1x144xf32) <- (4x1536x144xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_145, full_int_array_2) + + # pd_op.pool2d: (4x1536x1x1xf32) <- (4x1536x1x144xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_36, + full_int_array_23, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_23 + + # pd_op.squeeze: (4x1536x1xf32) <- (4x1536x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_2) + + # pd_op.flatten: (4x1536xf32) <- (4x1536x1xf32) + flatten_0 = paddle._C_ops.flatten(squeeze_0, 1, 2) + + # pd_op.matmul: (4x102xf32) <- (4x1536xf32, 1536x102xf32) + matmul_123 = paddle._C_ops.matmul(flatten_0, parameter_1, False, False) + del parameter_1 + + # pd_op.add: (4x102xf32) <- (4x102xf32, 102xf32) + add_169 = paddle._C_ops.add(matmul_123, parameter_0) + del ( + assign_0, + assign_1, + assign_10, + assign_101, + assign_103, + assign_104, + assign_105, + assign_106, + assign_107, + assign_108, + assign_109, + assign_111, + assign_112, + assign_114, + assign_115, + assign_116, + assign_117, + assign_118, + assign_119, + assign_12, + assign_121, + assign_123, + assign_124, + assign_125, + assign_126, + assign_127, + assign_128, + assign_129, + assign_13, + assign_131, + assign_132, + assign_134, + assign_135, + assign_136, + assign_137, + assign_138, + assign_139, + assign_14, + assign_141, + assign_143, + assign_144, + assign_145, + assign_146, + assign_147, + assign_148, + assign_149, + assign_15, + assign_151, + assign_152, + assign_154, + assign_155, + assign_156, + assign_157, + assign_158, + assign_159, + assign_16, + assign_161, + assign_163, + assign_164, + assign_165, + assign_166, + assign_167, + assign_168, + assign_169, + assign_17, + assign_171, + assign_172, + assign_174, + assign_175, + assign_176, + assign_177, + assign_178, + assign_179, + assign_18, + assign_181, + assign_183, + assign_184, + assign_185, + assign_186, + assign_187, + assign_188, + assign_189, + assign_19, + assign_191, + assign_192, + assign_194, + assign_195, + assign_196, + assign_197, + assign_198, + assign_199, + assign_2, + assign_20, + assign_201, + assign_203, + assign_204, + assign_205, + assign_206, + assign_207, + assign_208, + assign_209, + assign_21, + assign_211, + assign_212, + assign_214, + assign_215, + assign_216, + assign_217, + assign_218, + assign_219, + assign_22, + assign_221, + assign_223, + assign_224, + assign_225, + assign_226, + assign_227, + assign_228, + assign_229, + assign_23, + assign_231, + assign_232, + assign_234, + assign_235, + assign_236, + assign_237, + assign_238, + assign_239, + assign_24, + assign_240, + assign_241, + assign_242, + assign_243, + assign_244, + assign_245, + assign_247, + assign_248, + assign_249, + assign_25, + assign_250, + assign_251, + assign_252, + assign_254, + assign_256, + assign_257, + assign_258, + assign_259, + assign_26, + assign_260, + assign_261, + assign_262, + assign_264, + assign_265, + assign_267, + assign_268, + assign_269, + assign_28, + assign_3, + assign_30, + assign_31, + assign_32, + assign_33, + assign_34, + assign_35, + assign_36, + assign_38, + assign_39, + assign_4, + assign_41, + assign_42, + assign_43, + assign_44, + assign_45, + assign_46, + assign_47, + assign_48, + assign_49, + assign_5, + assign_50, + assign_51, + assign_52, + assign_54, + assign_55, + assign_56, + assign_57, + assign_58, + assign_59, + assign_6, + assign_61, + assign_63, + assign_64, + assign_65, + assign_66, + assign_67, + assign_68, + assign_69, + assign_7, + assign_71, + assign_72, + assign_74, + assign_75, + assign_76, + assign_77, + assign_78, + assign_79, + assign_8, + assign_81, + assign_83, + assign_84, + assign_85, + assign_86, + assign_87, + assign_88, + assign_89, + assign_91, + assign_92, + assign_94, + assign_95, + assign_96, + assign_97, + assign_98, + assign_99, + full_int_array_0, + full_int_array_1, + full_int_array_2, + full_int_array_3, + full_int_array_4, + full_int_array_5, + full_int_array_6, + full_int_array_7, + full_int_array_8, + parameter_0, + ) + + return ( + conv2d_0, + reshape_0, + add_0, + transpose_0, + layer_norm_0, + layer_norm_1, + layer_norm_2, + layer_norm_3, + layer_norm_4, + layer_norm_5, + reshape_1, + transpose_1, + reshape_2, + reshape_3, + matmul_0, + add_1, + transpose_2, + slice_0, + full_0, + scale_0, + transpose_3, + matmul_1, + reshape_4, + index_select_0, + transpose_4, + unsqueeze_0, + softmax_0, + transpose_5, + reshape_5, + matmul_2, + add_2, + reshape_6, + transpose_6, + reshape_7, + reshape_8, + add_3, + layer_norm_6, + layer_norm_7, + layer_norm_8, + matmul_3, + add_4, + gelu_0, + matmul_4, + add_5, + add_6, + layer_norm_9, + layer_norm_10, + layer_norm_11, + reshape_9, + roll_0, + transpose_7, + reshape_10, + reshape_11, + matmul_5, + add_7, + transpose_8, + slice_1, + assign_9, + scale_1, + transpose_9, + matmul_6, + reshape_12, + index_select_1, + transpose_10, + unsqueeze_1, + add_8, + reshape_13, + unsqueeze_2, + add_9, + softmax_1, + transpose_11, + reshape_14, + matmul_7, + add_10, + reshape_15, + transpose_12, + reshape_16, + roll_1, + reshape_17, + full_1, + floor_0, + divide_0, + multiply_0, + add_11, + layer_norm_12, + layer_norm_13, + layer_norm_14, + matmul_8, + add_12, + gelu_1, + matmul_9, + add_13, + assign_11, + floor_1, + divide_1, + multiply_1, + add_14, + reshape_18, + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + full_2, + concat_0, + reshape_19, + layer_norm_15, + layer_norm_16, + layer_norm_17, + matmul_10, + layer_norm_18, + layer_norm_19, + layer_norm_20, + reshape_20, + transpose_13, + reshape_21, + reshape_22, + matmul_11, + add_15, + transpose_14, + slice_2, + assign_27, + scale_2, + transpose_15, + matmul_12, + reshape_23, + index_select_2, + transpose_16, + unsqueeze_3, + softmax_2, + transpose_17, + reshape_24, + matmul_13, + add_16, + reshape_25, + transpose_18, + reshape_26, + reshape_27, + full_3, + floor_2, + divide_2, + multiply_2, + add_17, + layer_norm_21, + layer_norm_22, + layer_norm_23, + matmul_14, + add_18, + gelu_2, + matmul_15, + add_19, + assign_29, + floor_3, + divide_3, + multiply_3, + add_20, + layer_norm_24, + layer_norm_25, + layer_norm_26, + reshape_28, + roll_2, + transpose_19, + reshape_29, + reshape_30, + matmul_16, + add_21, + transpose_20, + slice_3, + assign_37, + scale_3, + transpose_21, + matmul_17, + reshape_31, + index_select_3, + transpose_22, + unsqueeze_4, + add_22, + reshape_32, + unsqueeze_5, + add_23, + softmax_3, + transpose_23, + reshape_33, + matmul_18, + add_24, + reshape_34, + transpose_24, + reshape_35, + roll_3, + reshape_36, + full_4, + floor_4, + divide_4, + multiply_4, + add_25, + layer_norm_27, + layer_norm_28, + layer_norm_29, + matmul_19, + add_26, + gelu_3, + matmul_20, + add_27, + assign_40, + floor_5, + divide_5, + multiply_5, + add_28, + reshape_37, + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + assign_53, + concat_1, + reshape_38, + layer_norm_30, + layer_norm_31, + layer_norm_32, + matmul_21, + layer_norm_33, + layer_norm_34, + layer_norm_35, + reshape_39, + transpose_25, + reshape_40, + reshape_41, + matmul_22, + add_29, + transpose_26, + slice_4, + assign_60, + scale_4, + transpose_27, + matmul_23, + reshape_42, + index_select_4, + transpose_28, + unsqueeze_6, + softmax_4, + transpose_29, + reshape_43, + matmul_24, + add_30, + reshape_44, + transpose_30, + reshape_45, + reshape_46, + full_5, + floor_6, + divide_6, + multiply_6, + add_31, + layer_norm_36, + layer_norm_37, + layer_norm_38, + matmul_25, + add_32, + gelu_4, + matmul_26, + add_33, + assign_62, + floor_7, + divide_7, + multiply_7, + add_34, + layer_norm_39, + layer_norm_40, + layer_norm_41, + reshape_47, + roll_4, + transpose_31, + reshape_48, + reshape_49, + matmul_27, + add_35, + transpose_32, + slice_5, + assign_70, + scale_5, + transpose_33, + matmul_28, + reshape_50, + index_select_5, + transpose_34, + unsqueeze_7, + add_36, + reshape_51, + unsqueeze_8, + add_37, + softmax_5, + transpose_35, + reshape_52, + matmul_29, + add_38, + reshape_53, + transpose_36, + reshape_54, + roll_5, + reshape_55, + full_6, + floor_8, + divide_8, + multiply_8, + add_39, + layer_norm_42, + layer_norm_43, + layer_norm_44, + matmul_30, + add_40, + gelu_5, + matmul_31, + add_41, + assign_73, + floor_9, + divide_9, + multiply_9, + add_42, + layer_norm_45, + layer_norm_46, + layer_norm_47, + reshape_56, + transpose_37, + reshape_57, + reshape_58, + matmul_32, + add_43, + transpose_38, + slice_6, + assign_80, + scale_6, + transpose_39, + matmul_33, + reshape_59, + index_select_6, + transpose_40, + unsqueeze_9, + softmax_6, + transpose_41, + reshape_60, + matmul_34, + add_44, + reshape_61, + transpose_42, + reshape_62, + reshape_63, + full_7, + floor_10, + divide_10, + multiply_10, + add_45, + layer_norm_48, + layer_norm_49, + layer_norm_50, + matmul_35, + add_46, + gelu_6, + matmul_36, + add_47, + assign_82, + floor_11, + divide_11, + multiply_11, + add_48, + layer_norm_51, + layer_norm_52, + layer_norm_53, + reshape_64, + roll_6, + transpose_43, + reshape_65, + reshape_66, + matmul_37, + add_49, + transpose_44, + slice_7, + assign_90, + scale_7, + transpose_45, + matmul_38, + reshape_67, + index_select_7, + transpose_46, + unsqueeze_10, + add_50, + reshape_68, + unsqueeze_11, + add_51, + softmax_7, + transpose_47, + reshape_69, + matmul_39, + add_52, + reshape_70, + transpose_48, + reshape_71, + roll_7, + reshape_72, + full_8, + floor_12, + divide_12, + multiply_12, + add_53, + layer_norm_54, + layer_norm_55, + layer_norm_56, + matmul_40, + add_54, + gelu_7, + matmul_41, + add_55, + assign_93, + floor_13, + divide_13, + multiply_13, + add_56, + layer_norm_57, + layer_norm_58, + layer_norm_59, + reshape_73, + transpose_49, + reshape_74, + reshape_75, + matmul_42, + add_57, + transpose_50, + slice_8, + assign_100, + scale_8, + transpose_51, + matmul_43, + reshape_76, + index_select_8, + transpose_52, + unsqueeze_12, + softmax_8, + transpose_53, + reshape_77, + matmul_44, + add_58, + reshape_78, + transpose_54, + reshape_79, + reshape_80, + full_9, + floor_14, + divide_14, + multiply_14, + add_59, + layer_norm_60, + layer_norm_61, + layer_norm_62, + matmul_45, + add_60, + gelu_8, + matmul_46, + add_61, + assign_102, + floor_15, + divide_15, + multiply_15, + add_62, + layer_norm_63, + layer_norm_64, + layer_norm_65, + reshape_81, + roll_8, + transpose_55, + reshape_82, + reshape_83, + matmul_47, + add_63, + transpose_56, + slice_9, + assign_110, + scale_9, + transpose_57, + matmul_48, + reshape_84, + index_select_9, + transpose_58, + unsqueeze_13, + add_64, + reshape_85, + unsqueeze_14, + add_65, + softmax_9, + transpose_59, + reshape_86, + matmul_49, + add_66, + reshape_87, + transpose_60, + reshape_88, + roll_9, + reshape_89, + full_10, + floor_16, + divide_16, + multiply_16, + add_67, + layer_norm_66, + layer_norm_67, + layer_norm_68, + matmul_50, + add_68, + gelu_9, + matmul_51, + add_69, + assign_113, + floor_17, + divide_17, + multiply_17, + add_70, + layer_norm_69, + layer_norm_70, + layer_norm_71, + reshape_90, + transpose_61, + reshape_91, + reshape_92, + matmul_52, + add_71, + transpose_62, + slice_10, + assign_120, + scale_10, + transpose_63, + matmul_53, + reshape_93, + index_select_10, + transpose_64, + unsqueeze_15, + softmax_10, + transpose_65, + reshape_94, + matmul_54, + add_72, + reshape_95, + transpose_66, + reshape_96, + reshape_97, + full_11, + floor_18, + divide_18, + multiply_18, + add_73, + layer_norm_72, + layer_norm_73, + layer_norm_74, + matmul_55, + add_74, + gelu_10, + matmul_56, + add_75, + assign_122, + floor_19, + divide_19, + multiply_19, + add_76, + layer_norm_75, + layer_norm_76, + layer_norm_77, + reshape_98, + roll_10, + transpose_67, + reshape_99, + reshape_100, + matmul_57, + add_77, + transpose_68, + slice_11, + assign_130, + scale_11, + transpose_69, + matmul_58, + reshape_101, + index_select_11, + transpose_70, + unsqueeze_16, + add_78, + reshape_102, + unsqueeze_17, + add_79, + softmax_11, + transpose_71, + reshape_103, + matmul_59, + add_80, + reshape_104, + transpose_72, + reshape_105, + roll_11, + reshape_106, + full_12, + floor_20, + divide_20, + multiply_20, + add_81, + layer_norm_78, + layer_norm_79, + layer_norm_80, + matmul_60, + add_82, + gelu_11, + matmul_61, + add_83, + assign_133, + floor_21, + divide_21, + multiply_21, + add_84, + layer_norm_81, + layer_norm_82, + layer_norm_83, + reshape_107, + transpose_73, + reshape_108, + reshape_109, + matmul_62, + add_85, + transpose_74, + slice_12, + assign_140, + scale_12, + transpose_75, + matmul_63, + reshape_110, + index_select_12, + transpose_76, + unsqueeze_18, + softmax_12, + transpose_77, + reshape_111, + matmul_64, + add_86, + reshape_112, + transpose_78, + reshape_113, + reshape_114, + full_13, + floor_22, + divide_22, + multiply_22, + add_87, + layer_norm_84, + layer_norm_85, + layer_norm_86, + matmul_65, + add_88, + gelu_12, + matmul_66, + add_89, + assign_142, + floor_23, + divide_23, + multiply_23, + add_90, + layer_norm_87, + layer_norm_88, + layer_norm_89, + reshape_115, + roll_12, + transpose_79, + reshape_116, + reshape_117, + matmul_67, + add_91, + transpose_80, + slice_13, + assign_150, + scale_13, + transpose_81, + matmul_68, + reshape_118, + index_select_13, + transpose_82, + unsqueeze_19, + add_92, + reshape_119, + unsqueeze_20, + add_93, + softmax_13, + transpose_83, + reshape_120, + matmul_69, + add_94, + reshape_121, + transpose_84, + reshape_122, + roll_13, + reshape_123, + full_14, + floor_24, + divide_24, + multiply_24, + add_95, + layer_norm_90, + layer_norm_91, + layer_norm_92, + matmul_70, + add_96, + gelu_13, + matmul_71, + add_97, + assign_153, + floor_25, + divide_25, + multiply_25, + add_98, + layer_norm_93, + layer_norm_94, + layer_norm_95, + reshape_124, + transpose_85, + reshape_125, + reshape_126, + matmul_72, + add_99, + transpose_86, + slice_14, + assign_160, + scale_14, + transpose_87, + matmul_73, + reshape_127, + index_select_14, + transpose_88, + unsqueeze_21, + softmax_14, + transpose_89, + reshape_128, + matmul_74, + add_100, + reshape_129, + transpose_90, + reshape_130, + reshape_131, + full_15, + floor_26, + divide_26, + multiply_26, + add_101, + layer_norm_96, + layer_norm_97, + layer_norm_98, + matmul_75, + add_102, + gelu_14, + matmul_76, + add_103, + assign_162, + floor_27, + divide_27, + multiply_27, + add_104, + layer_norm_99, + layer_norm_100, + layer_norm_101, + reshape_132, + roll_14, + transpose_91, + reshape_133, + reshape_134, + matmul_77, + add_105, + transpose_92, + slice_15, + assign_170, + scale_15, + transpose_93, + matmul_78, + reshape_135, + index_select_15, + transpose_94, + unsqueeze_22, + add_106, + reshape_136, + unsqueeze_23, + add_107, + softmax_15, + transpose_95, + reshape_137, + matmul_79, + add_108, + reshape_138, + transpose_96, + reshape_139, + roll_15, + reshape_140, + full_16, + floor_28, + divide_28, + multiply_28, + add_109, + layer_norm_102, + layer_norm_103, + layer_norm_104, + matmul_80, + add_110, + gelu_15, + matmul_81, + add_111, + assign_173, + floor_29, + divide_29, + multiply_29, + add_112, + layer_norm_105, + layer_norm_106, + layer_norm_107, + reshape_141, + transpose_97, + reshape_142, + reshape_143, + matmul_82, + add_113, + transpose_98, + slice_16, + assign_180, + scale_16, + transpose_99, + matmul_83, + reshape_144, + index_select_16, + transpose_100, + unsqueeze_24, + softmax_16, + transpose_101, + reshape_145, + matmul_84, + add_114, + reshape_146, + transpose_102, + reshape_147, + reshape_148, + full_17, + floor_30, + divide_30, + multiply_30, + add_115, + layer_norm_108, + layer_norm_109, + layer_norm_110, + matmul_85, + add_116, + gelu_16, + matmul_86, + add_117, + assign_182, + floor_31, + divide_31, + multiply_31, + add_118, + layer_norm_111, + layer_norm_112, + layer_norm_113, + reshape_149, + roll_16, + transpose_103, + reshape_150, + reshape_151, + matmul_87, + add_119, + transpose_104, + slice_17, + assign_190, + scale_17, + transpose_105, + matmul_88, + reshape_152, + index_select_17, + transpose_106, + unsqueeze_25, + add_120, + reshape_153, + unsqueeze_26, + add_121, + softmax_17, + transpose_107, + reshape_154, + matmul_89, + add_122, + reshape_155, + transpose_108, + reshape_156, + roll_17, + reshape_157, + full_18, + floor_32, + divide_32, + multiply_32, + add_123, + layer_norm_114, + layer_norm_115, + layer_norm_116, + matmul_90, + add_124, + gelu_17, + matmul_91, + add_125, + assign_193, + floor_33, + divide_33, + multiply_33, + add_126, + layer_norm_117, + layer_norm_118, + layer_norm_119, + reshape_158, + transpose_109, + reshape_159, + reshape_160, + matmul_92, + add_127, + transpose_110, + slice_18, + assign_200, + scale_18, + transpose_111, + matmul_93, + reshape_161, + index_select_18, + transpose_112, + unsqueeze_27, + softmax_18, + transpose_113, + reshape_162, + matmul_94, + add_128, + reshape_163, + transpose_114, + reshape_164, + reshape_165, + full_19, + floor_34, + divide_34, + multiply_34, + add_129, + layer_norm_120, + layer_norm_121, + layer_norm_122, + matmul_95, + add_130, + gelu_18, + matmul_96, + add_131, + assign_202, + floor_35, + divide_35, + multiply_35, + add_132, + layer_norm_123, + layer_norm_124, + layer_norm_125, + reshape_166, + roll_18, + transpose_115, + reshape_167, + reshape_168, + matmul_97, + add_133, + transpose_116, + slice_19, + assign_210, + scale_19, + transpose_117, + matmul_98, + reshape_169, + index_select_19, + transpose_118, + unsqueeze_28, + add_134, + reshape_170, + unsqueeze_29, + add_135, + softmax_19, + transpose_119, + reshape_171, + matmul_99, + add_136, + reshape_172, + transpose_120, + reshape_173, + roll_19, + reshape_174, + full_20, + floor_36, + divide_36, + multiply_36, + add_137, + layer_norm_126, + layer_norm_127, + layer_norm_128, + matmul_100, + add_138, + gelu_19, + matmul_101, + add_139, + assign_213, + floor_37, + divide_37, + multiply_37, + add_140, + layer_norm_129, + layer_norm_130, + layer_norm_131, + reshape_175, + transpose_121, + reshape_176, + reshape_177, + matmul_102, + add_141, + transpose_122, + slice_20, + assign_220, + scale_20, + transpose_123, + matmul_103, + reshape_178, + index_select_20, + transpose_124, + unsqueeze_30, + softmax_20, + transpose_125, + reshape_179, + matmul_104, + add_142, + reshape_180, + transpose_126, + reshape_181, + reshape_182, + full_21, + floor_38, + divide_38, + multiply_38, + add_143, + layer_norm_132, + layer_norm_133, + layer_norm_134, + matmul_105, + add_144, + gelu_20, + matmul_106, + add_145, + assign_222, + floor_39, + divide_39, + multiply_39, + add_146, + layer_norm_135, + layer_norm_136, + layer_norm_137, + reshape_183, + roll_20, + transpose_127, + reshape_184, + reshape_185, + matmul_107, + add_147, + transpose_128, + slice_21, + assign_230, + scale_21, + transpose_129, + matmul_108, + reshape_186, + index_select_21, + transpose_130, + unsqueeze_31, + add_148, + reshape_187, + unsqueeze_32, + add_149, + softmax_21, + transpose_131, + reshape_188, + matmul_109, + add_150, + reshape_189, + transpose_132, + reshape_190, + roll_21, + reshape_191, + full_22, + floor_40, + divide_40, + multiply_40, + add_151, + layer_norm_138, + layer_norm_139, + layer_norm_140, + matmul_110, + add_152, + gelu_21, + matmul_111, + add_153, + assign_233, + floor_41, + divide_41, + multiply_41, + add_154, + reshape_192, + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + assign_246, + concat_2, + reshape_193, + layer_norm_141, + layer_norm_142, + layer_norm_143, + matmul_112, + layer_norm_144, + layer_norm_145, + layer_norm_146, + reshape_194, + transpose_133, + reshape_195, + reshape_196, + matmul_113, + add_155, + transpose_134, + slice_22, + assign_253, + scale_22, + transpose_135, + matmul_114, + reshape_197, + index_select_22, + transpose_136, + unsqueeze_33, + softmax_22, + transpose_137, + reshape_198, + matmul_115, + add_156, + reshape_199, + transpose_138, + reshape_200, + reshape_201, + full_23, + floor_42, + divide_42, + multiply_42, + add_157, + layer_norm_147, + layer_norm_148, + layer_norm_149, + matmul_116, + add_158, + gelu_22, + matmul_117, + add_159, + assign_255, + floor_43, + divide_43, + multiply_43, + add_160, + layer_norm_150, + layer_norm_151, + layer_norm_152, + reshape_202, + roll_22, + transpose_139, + reshape_203, + reshape_204, + matmul_118, + add_161, + transpose_140, + slice_23, + assign_263, + scale_23, + transpose_141, + matmul_119, + reshape_205, + index_select_23, + transpose_142, + unsqueeze_34, + add_162, + reshape_206, + unsqueeze_35, + add_163, + softmax_23, + transpose_143, + reshape_207, + matmul_120, + add_164, + reshape_208, + transpose_144, + reshape_209, + roll_23, + reshape_210, + full_24, + floor_44, + divide_44, + multiply_44, + add_165, + layer_norm_153, + layer_norm_154, + layer_norm_155, + matmul_121, + add_166, + gelu_23, + matmul_122, + add_167, + assign_266, + floor_45, + divide_45, + multiply_45, + add_168, + layer_norm_156, + layer_norm_157, + transpose_145, + unsqueeze_36, + pool2d_0, + squeeze_0, + flatten_0, + matmul_123, + add_169, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/weight_meta.py new file mode 100644 index 00000000..88a6a1a4 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_0/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [1536, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [6144, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1536, 6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [1536, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1536, 4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [6144, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [1536, 6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1536, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [1536, 4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [3072, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [1536, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [768, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [192, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/graph_net.json b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/input_meta.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/input_meta.py new file mode 100644 index 00000000..44ed35da --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [32, 3, 384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [529, 6] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [529, 6] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [529, 12] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [529, 12] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [529, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [529, 48] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [144, 144] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [529, 48] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/model.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/model.py new file mode 100644 index 00000000..b94e03bc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/model.py @@ -0,0 +1,9751 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.shape64: (4xi64) <- (-1x3x384x384xf32) + shape64_0 = paddle._C_ops.shape64(data_0) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [1] + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + shape64_0, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_0 + + # pd_op.conv2d: (-1x192x96x96xf32) <- (-1x3x384x384xf32, 192x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_2 = [1, -1, 1, 1] + + # pd_op.reshape: (1x192x1x1xf32) <- (192xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_2) + del full_int_array_2, parameter_303 + + # pd_op.add: (-1x192x96x96xf32) <- (-1x192x96x96xf32, 1x192x1x1xf32) + add_1 = paddle._C_ops.add(conv2d_0, reshape_0) + del conv2d_0, reshape_0 + + # pd_op.shape64: (4xi64) <- (-1x192x96x96xf32) + shape64_1 = paddle._C_ops.shape64(add_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + shape64_1, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_1 + + # pd_op.flatten: (-1x192x9216xf32) <- (-1x192x96x96xf32) + flatten_0 = paddle._C_ops.flatten(add_1, 2, 3) + del add_1 + + # pd_op.transpose: (-1x9216x192xf32) <- (-1x192x9216xf32) + transpose_0 = paddle._C_ops.transpose(flatten_0, [0, 2, 1]) + del flatten_0 + + # pd_op.layer_norm: (-1x9216x192xf32, -1x9216xf32, -1x9216xf32) <- (-1x9216x192xf32, 192xf32, 192xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302, transpose_0 + + # pd_op.shape64: (3xi64) <- (-1x9216x192xf32) + shape64_2 = paddle._C_ops.shape64(layer_norm_0) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + shape64_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_2 + + # pd_op.layer_norm: (-1x9216x192xf32, -1x9216xf32, -1x9216xf32) <- (-1x9216x192xf32, 192xf32, 192xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("96"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_1 = paddle._C_ops.full( + [], float("192"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_0 = [slice_2, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_0 = paddle._C_ops.stack(combine_0, 0) + del combine_0 + + # pd_op.reshape: (-1x96x96x192xf32) <- (-1x9216x192xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, stack_0) + del layer_norm_3, stack_0 + + # pd_op.shape64: (4xi64) <- (-1x96x96x192xf32) + shape64_3 = paddle._C_ops.shape64(reshape_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + shape64_3, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_3 + + # pd_op.full: (xi64) <- () + full_2 = paddle._C_ops.full( + [], float("8"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_3 = paddle._C_ops.full( + [], float("12"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_1 = [slice_3, full_2, full_3, full_2, full_3, full_1] + del slice_3 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_1 = paddle._C_ops.stack(combine_1, 0) + del combine_1 + + # pd_op.reshape: (-1x8x12x8x12x192xf32) <- (-1x96x96x192xf32, 6xi64) + reshape_2 = paddle._C_ops.reshape(reshape_1, stack_1) + del reshape_1, stack_1 + + # pd_op.transpose: (-1x8x8x12x12x192xf32) <- (-1x8x12x8x12x192xf32) + transpose_1 = paddle._C_ops.transpose(reshape_2, [0, 1, 3, 2, 4, 5]) + del reshape_2 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_3 = [-1, 12, 12, 192] + + # pd_op.reshape: (-1x12x12x192xf32) <- (-1x8x8x12x12x192xf32, 4xi64) + reshape_3 = paddle._C_ops.reshape(transpose_1, full_int_array_3) + del transpose_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [-1, 144, 192] + + # pd_op.reshape: (-1x144x192xf32) <- (-1x12x12x192xf32, 3xi64) + reshape_4 = paddle._C_ops.reshape(reshape_3, full_int_array_4) + del reshape_3 + + # pd_op.shape64: (3xi64) <- (-1x144x192xf32) + shape64_4 = paddle._C_ops.shape64(reshape_4) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + shape64_4, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_4 + + # pd_op.matmul: (-1x144x576xf32) <- (-1x144x192xf32, 192x576xf32) + matmul_0 = paddle._C_ops.matmul(reshape_4, parameter_298, False, False) + del parameter_298, reshape_4 + + # pd_op.add: (-1x144x576xf32) <- (-1x144x576xf32, 576xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_297) + del matmul_0, parameter_297 + + # pd_op.full: (xi64) <- () + full_4 = paddle._C_ops.full( + [], float("144"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_5 = paddle._C_ops.full( + [], float("3"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_6 = paddle._C_ops.full( + [], float("6"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_7 = paddle._C_ops.full( + [], float("32"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_2 = [slice_4, full_4, full_5, full_6, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_2 = paddle._C_ops.stack(combine_2, 0) + del combine_2 + + # pd_op.reshape: (-1x144x3x6x32xf32) <- (-1x144x576xf32, 5xi64) + reshape_5 = paddle._C_ops.reshape(add_2, stack_2) + del add_2, stack_2 + + # pd_op.transpose: (3x-1x6x144x32xf32) <- (-1x144x3x6x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_5, [2, 0, 3, 1, 4]) + del reshape_5 + + # pd_op.slice: (-1x6x144x32xf32) <- (3x-1x6x144x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_5 = [2] + + # pd_op.slice: (-1x6x144x32xf32) <- (3x-1x6x144x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_6 = [3] + + # pd_op.slice: (-1x6x144x32xf32) <- (3x-1x6x144x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_2 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (-1x6x144x32xf32) <- (-1x6x144x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_5, full_8, float("0"), True) + del slice_5 + + # pd_op.transpose: (-1x6x32x144xf32) <- (-1x6x144x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_6, [0, 1, 3, 2]) + del slice_6 + + # pd_op.matmul: (-1x6x144x144xf32) <- (-1x6x144x32xf32, -1x6x32x144xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + del scale_0, transpose_3 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [-1] + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_6 = paddle._C_ops.reshape(data_1, full_int_array_7) + del data_1 + + # pd_op.index_select: (20736x6xf32) <- (529x6xf32, 20736xi64) + index_select_0 = paddle._C_ops.index_select(data_2, reshape_6, 0) + del data_2, reshape_6 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_8 = [144, 144, -1] + + # pd_op.reshape: (144x144x6xf32) <- (20736x6xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(index_select_0, full_int_array_8) + del index_select_0 + + # pd_op.transpose: (6x144x144xf32) <- (144x144x6xf32) + transpose_4 = paddle._C_ops.transpose(reshape_7, [2, 0, 1]) + del reshape_7 + + # pd_op.unsqueeze: (1x6x144x144xf32) <- (6x144x144xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_0) + del transpose_4 + + # pd_op.add: (-1x6x144x144xf32) <- (-1x6x144x144xf32, 1x6x144x144xf32) + add_3 = paddle._C_ops.add(matmul_1, unsqueeze_0) + del matmul_1, unsqueeze_0 + + # pd_op.softmax: (-1x6x144x144xf32) <- (-1x6x144x144xf32) + softmax_0 = paddle._C_ops.softmax(add_3, -1) + del add_3 + + # pd_op.matmul: (-1x6x144x32xf32) <- (-1x6x144x144xf32, -1x6x144x32xf32) + matmul_2 = paddle._C_ops.matmul(softmax_0, slice_7, False, False) + del slice_7, softmax_0 + + # pd_op.transpose: (-1x144x6x32xf32) <- (-1x6x144x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_2, [0, 2, 1, 3]) + del matmul_2 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_3 = [slice_4, full_4, full_1] + del slice_4 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_3 = paddle._C_ops.stack(combine_3, 0) + del combine_3 + + # pd_op.reshape: (-1x144x192xf32) <- (-1x144x6x32xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(transpose_5, stack_3) + del stack_3, transpose_5 + + # pd_op.matmul: (-1x144x192xf32) <- (-1x144x192xf32, 192x192xf32) + matmul_3 = paddle._C_ops.matmul(reshape_8, parameter_296, False, False) + del parameter_296, reshape_8 + + # pd_op.add: (-1x144x192xf32) <- (-1x144x192xf32, 192xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_295) + del matmul_3, parameter_295 + + # pd_op.reshape: (-1x12x12x192xf32) <- (-1x144x192xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_4, full_int_array_3) + del add_4 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_9 = [-1, 8, 8, 12, 12, 192] + + # pd_op.reshape: (-1x8x8x12x12x192xf32) <- (-1x12x12x192xf32, 6xi64) + reshape_10 = paddle._C_ops.reshape(reshape_9, full_int_array_9) + del reshape_9 + + # pd_op.transpose: (-1x8x12x8x12x192xf32) <- (-1x8x8x12x12x192xf32) + transpose_6 = paddle._C_ops.transpose(reshape_10, [0, 1, 3, 2, 4, 5]) + del reshape_10 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [-1, 96, 96, 192] + + # pd_op.reshape: (-1x96x96x192xf32) <- (-1x8x12x8x12x192xf32, 4xi64) + reshape_11 = paddle._C_ops.reshape(transpose_6, full_int_array_10) + del transpose_6 + + # pd_op.full: (xi64) <- () + full_9 = paddle._C_ops.full( + [], float("9216"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_4 = [slice_2, full_9, full_1] + del slice_2 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_4 = paddle._C_ops.stack(combine_4, 0) + del combine_4 + + # pd_op.reshape: (-1x9216x192xf32) <- (-1x96x96x192xf32, 3xi64) + reshape_12 = paddle._C_ops.reshape(reshape_11, stack_4) + del reshape_11, stack_4 + + # pd_op.add: (-1x9216x192xf32) <- (-1x9216x192xf32, -1x9216x192xf32) + add_5 = paddle._C_ops.add(layer_norm_0, reshape_12) + del layer_norm_0, reshape_12 + + # pd_op.layer_norm: (-1x9216x192xf32, -1x9216xf32, -1x9216xf32) <- (-1x9216x192xf32, 192xf32, 192xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_5, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (-1x9216x768xf32) <- (-1x9216x192xf32, 192x768xf32) + matmul_4 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del layer_norm_6, parameter_292 + + # pd_op.add: (-1x9216x768xf32) <- (-1x9216x768xf32, 768xf32) + add_6 = paddle._C_ops.add(matmul_4, parameter_291) + del matmul_4, parameter_291 + + # pd_op.gelu: (-1x9216x768xf32) <- (-1x9216x768xf32) + gelu_0 = paddle._C_ops.gelu(add_6, False) + del add_6 + + # pd_op.matmul: (-1x9216x192xf32) <- (-1x9216x768xf32, 768x192xf32) + matmul_5 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del gelu_0, parameter_290 + + # pd_op.add: (-1x9216x192xf32) <- (-1x9216x192xf32, 192xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_289) + del matmul_5, parameter_289 + + # pd_op.add: (-1x9216x192xf32) <- (-1x9216x192xf32, -1x9216x192xf32) + add_8 = paddle._C_ops.add(add_5, add_7) + del add_5, add_7 + + # pd_op.shape64: (3xi64) <- (-1x9216x192xf32) + shape64_5 = paddle._C_ops.shape64(add_8) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + shape64_5, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_5 + + # pd_op.layer_norm: (-1x9216x192xf32, -1x9216xf32, -1x9216xf32) <- (-1x9216x192xf32, 192xf32, 192xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_5 = [slice_8, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_5 = paddle._C_ops.stack(combine_5, 0) + del combine_5 + + # pd_op.reshape: (-1x96x96x192xf32) <- (-1x9216x192xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(layer_norm_9, stack_5) + del layer_norm_9, stack_5 + + # pd_op.shape64: (4xi64) <- (-1x96x96x192xf32) + shape64_6 = paddle._C_ops.shape64(reshape_13) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + shape64_6, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_11 = [-6, -6] + + # pd_op.roll: (-1x96x96x192xf32) <- (-1x96x96x192xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_13, full_int_array_11, [1, 2]) + del reshape_13 + + # pd_op.shape64: (4xi64) <- (-1x96x96x192xf32) + shape64_7 = paddle._C_ops.shape64(roll_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + shape64_7, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_7 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_6 = [slice_10, full_2, full_3, full_2, full_3, full_1] + del full_2, slice_10 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_6 = paddle._C_ops.stack(combine_6, 0) + del combine_6 + + # pd_op.reshape: (-1x8x12x8x12x192xf32) <- (-1x96x96x192xf32, 6xi64) + reshape_14 = paddle._C_ops.reshape(roll_0, stack_6) + del roll_0, stack_6 + + # pd_op.transpose: (-1x8x8x12x12x192xf32) <- (-1x8x12x8x12x192xf32) + transpose_7 = paddle._C_ops.transpose(reshape_14, [0, 1, 3, 2, 4, 5]) + del reshape_14 + + # pd_op.reshape: (-1x12x12x192xf32) <- (-1x8x8x12x12x192xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(transpose_7, full_int_array_3) + del transpose_7 + + # pd_op.reshape: (-1x144x192xf32) <- (-1x12x12x192xf32, 3xi64) + reshape_16 = paddle._C_ops.reshape(reshape_15, full_int_array_4) + del full_int_array_4, reshape_15 + + # pd_op.full: (1x96x96x1xf32) <- () + full_10 = paddle._C_ops.full( + [1, 96, 96, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_12 = [0, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_13 = [-12, -12] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_14 = [1, 1] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_10, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_10 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_15 = [0, -12] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_16 = [-12, -6] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_17 = [0, -6] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_18 = [-12, 2147483647] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_19 = [-12, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_20 = [-6, -12] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [-6, 2147483647] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-6, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [2147483647, -12] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [2147483647, -6] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [2147483647, 2147483647] + + # pd_op.set_value_: (1x96x96x1xf32) <- (1x96x96x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_26 = [1, 8, 12, 8, 12, 1] + + # pd_op.reshape: (1x8x12x8x12x1xf32) <- (1x96x96x1xf32, 6xi64) + reshape_17 = paddle._C_ops.reshape(set_value__0, full_int_array_26) + del full_int_array_26 + + # pd_op.transpose: (1x8x8x12x12x1xf32) <- (1x8x12x8x12x1xf32) + transpose_8 = paddle._C_ops.transpose(reshape_17, [0, 1, 3, 2, 4, 5]) + del reshape_17 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_27 = [-1, 12, 12, 1] + + # pd_op.reshape: (64x12x12x1xf32) <- (1x8x8x12x12x1xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(transpose_8, full_int_array_27) + del transpose_8 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [-1, 144] + + # pd_op.reshape: (64x144xf32) <- (64x12x12x1xf32, 2xi64) + reshape_19 = paddle._C_ops.reshape(reshape_18, full_int_array_28) + del reshape_18 + + # pd_op.unsqueeze: (64x1x144xf32) <- (64x144xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(reshape_19, full_int_array_1) + + # pd_op.unsqueeze: (64x144x1xf32) <- (64x144xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(reshape_19, full_int_array_5) + del reshape_19 + + # pd_op.subtract: (64x144x144xf32) <- (64x1x144xf32, 64x144x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_1, unsqueeze_2) + del unsqueeze_1, unsqueeze_2 + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x144x144xb) <- (64x144x144xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_11) + + # pd_op.full: (64x144x144xf32) <- () + full_12 = paddle._C_ops.full( + [64, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x144x144xf32) <- (64x144x144xb, 64x144x144xf32, 64x144x144xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_12, subtract_0) + del full_12, not_equal_0, subtract_0 + + # pd_op.equal: (64x144x144xb) <- (64x144x144xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_11) + + # pd_op.full: (64x144x144xf32) <- () + full_13 = paddle._C_ops.full( + [64, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x144x144xf32) <- (64x144x144xb, 64x144x144xf32, 64x144x144xf32) + where_1 = paddle._C_ops.where(equal_0, full_13, where_0) + del equal_0, full_13, where_0 + + # pd_op.shape64: (3xi64) <- (-1x144x192xf32) + shape64_8 = paddle._C_ops.shape64(reshape_16) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + shape64_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_8 + + # pd_op.matmul: (-1x144x576xf32) <- (-1x144x192xf32, 192x576xf32) + matmul_6 = paddle._C_ops.matmul(reshape_16, parameter_286, False, False) + del parameter_286, reshape_16 + + # pd_op.add: (-1x144x576xf32) <- (-1x144x576xf32, 576xf32) + add_9 = paddle._C_ops.add(matmul_6, parameter_285) + del matmul_6, parameter_285 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_7 = [slice_11, full_4, full_5, full_6, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_7 = paddle._C_ops.stack(combine_7, 0) + del combine_7 + + # pd_op.reshape: (-1x144x3x6x32xf32) <- (-1x144x576xf32, 5xi64) + reshape_20 = paddle._C_ops.reshape(add_9, stack_7) + del add_9, stack_7 + + # pd_op.transpose: (3x-1x6x144x32xf32) <- (-1x144x3x6x32xf32) + transpose_9 = paddle._C_ops.transpose(reshape_20, [2, 0, 3, 1, 4]) + del reshape_20 + + # pd_op.slice: (-1x6x144x32xf32) <- (3x-1x6x144x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x6x144x32xf32) <- (3x-1x6x144x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x6x144x32xf32) <- (3x-1x6x144x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_9 + + # pd_op.scale: (-1x6x144x32xf32) <- (-1x6x144x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_12, full_8, float("0"), True) + del slice_12 + + # pd_op.transpose: (-1x6x32x144xf32) <- (-1x6x144x32xf32) + transpose_10 = paddle._C_ops.transpose(slice_13, [0, 1, 3, 2]) + del slice_13 + + # pd_op.matmul: (-1x6x144x144xf32) <- (-1x6x144x32xf32, -1x6x32x144xf32) + matmul_7 = paddle._C_ops.matmul(scale_1, transpose_10, False, False) + del scale_1, transpose_10 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_21 = paddle._C_ops.reshape(data_3, full_int_array_7) + del data_3 + + # pd_op.index_select: (20736x6xf32) <- (529x6xf32, 20736xi64) + index_select_1 = paddle._C_ops.index_select(data_4, reshape_21, 0) + del data_4, reshape_21 + + # pd_op.reshape: (144x144x6xf32) <- (20736x6xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(index_select_1, full_int_array_8) + del index_select_1 + + # pd_op.transpose: (6x144x144xf32) <- (144x144x6xf32) + transpose_11 = paddle._C_ops.transpose(reshape_22, [2, 0, 1]) + del reshape_22 + + # pd_op.unsqueeze: (1x6x144x144xf32) <- (6x144x144xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_11, full_int_array_0) + del transpose_11 + + # pd_op.add: (-1x6x144x144xf32) <- (-1x6x144x144xf32, 1x6x144x144xf32) + add_10 = paddle._C_ops.add(matmul_7, unsqueeze_3) + del matmul_7, unsqueeze_3 + + # pd_op.full: (xi64) <- () + full_14 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_0 = paddle._C_ops.floor_divide(slice_11, full_14) + del full_14 + + # pd_op.full: (xi64) <- () + full_15 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_8 = [floor_divide_0, full_15, full_6, full_4, full_4] + del floor_divide_0, full_15 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_8 = paddle._C_ops.stack(combine_8, 0) + del combine_8 + + # pd_op.reshape: (-1x64x6x144x144xf32) <- (-1x6x144x144xf32, 5xi64) + reshape_23 = paddle._C_ops.reshape(add_10, stack_8) + del add_10, stack_8 + + # pd_op.unsqueeze: (64x1x144x144xf32) <- (64x144x144xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(where_1, full_int_array_1) + del where_1 + + # pd_op.unsqueeze: (1x64x1x144x144xf32) <- (64x1x144x144xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_4, full_int_array_0) + del unsqueeze_4 + + # pd_op.add: (-1x64x6x144x144xf32) <- (-1x64x6x144x144xf32, 1x64x1x144x144xf32) + add_11 = paddle._C_ops.add(reshape_23, unsqueeze_5) + del reshape_23, unsqueeze_5 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_9 = [slice_11, full_6, full_4, full_4] + del full_6 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_9 = paddle._C_ops.stack(combine_9, 0) + del combine_9 + + # pd_op.reshape: (-1x6x144x144xf32) <- (-1x64x6x144x144xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_11, stack_9) + del add_11, stack_9 + + # pd_op.softmax: (-1x6x144x144xf32) <- (-1x6x144x144xf32) + softmax_1 = paddle._C_ops.softmax(reshape_24, -1) + del reshape_24 + + # pd_op.matmul: (-1x6x144x32xf32) <- (-1x6x144x144xf32, -1x6x144x32xf32) + matmul_8 = paddle._C_ops.matmul(softmax_1, slice_14, False, False) + del slice_14, softmax_1 + + # pd_op.transpose: (-1x144x6x32xf32) <- (-1x6x144x32xf32) + transpose_12 = paddle._C_ops.transpose(matmul_8, [0, 2, 1, 3]) + del matmul_8 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_10 = [slice_11, full_4, full_1] + del slice_11 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_10 = paddle._C_ops.stack(combine_10, 0) + del combine_10 + + # pd_op.reshape: (-1x144x192xf32) <- (-1x144x6x32xf32, 3xi64) + reshape_25 = paddle._C_ops.reshape(transpose_12, stack_10) + del stack_10, transpose_12 + + # pd_op.matmul: (-1x144x192xf32) <- (-1x144x192xf32, 192x192xf32) + matmul_9 = paddle._C_ops.matmul(reshape_25, parameter_284, False, False) + del parameter_284, reshape_25 + + # pd_op.add: (-1x144x192xf32) <- (-1x144x192xf32, 192xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_283) + del matmul_9, parameter_283 + + # pd_op.reshape: (-1x12x12x192xf32) <- (-1x144x192xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_12, full_int_array_3) + del add_12, full_int_array_3 + + # pd_op.reshape: (-1x8x8x12x12x192xf32) <- (-1x12x12x192xf32, 6xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, full_int_array_9) + del full_int_array_9, reshape_26 + + # pd_op.transpose: (-1x8x12x8x12x192xf32) <- (-1x8x8x12x12x192xf32) + transpose_13 = paddle._C_ops.transpose(reshape_27, [0, 1, 3, 2, 4, 5]) + del reshape_27 + + # pd_op.reshape: (-1x96x96x192xf32) <- (-1x8x12x8x12x192xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(transpose_13, full_int_array_10) + del full_int_array_10, transpose_13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [6, 6] + + # pd_op.roll: (-1x96x96x192xf32) <- (-1x96x96x192xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_28, full_int_array_29, [1, 2]) + del reshape_28 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_11 = [slice_8, full_9, full_1] + del full_9, slice_8 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_11 = paddle._C_ops.stack(combine_11, 0) + del combine_11 + + # pd_op.reshape: (-1x9216x192xf32) <- (-1x96x96x192xf32, 3xi64) + reshape_29 = paddle._C_ops.reshape(roll_1, stack_11) + del roll_1, stack_11 + + # pd_op.add: (-1x9216x192xf32) <- (-1x9216x192xf32, -1x9216x192xf32) + add_13 = paddle._C_ops.add(add_8, reshape_29) + del add_8, reshape_29 + + # pd_op.layer_norm: (-1x9216x192xf32, -1x9216xf32, -1x9216xf32) <- (-1x9216x192xf32, 192xf32, 192xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_13, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (-1x9216x768xf32) <- (-1x9216x192xf32, 192x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del layer_norm_12, parameter_280 + + # pd_op.add: (-1x9216x768xf32) <- (-1x9216x768xf32, 768xf32) + add_14 = paddle._C_ops.add(matmul_10, parameter_279) + del matmul_10, parameter_279 + + # pd_op.gelu: (-1x9216x768xf32) <- (-1x9216x768xf32) + gelu_1 = paddle._C_ops.gelu(add_14, False) + del add_14 + + # pd_op.matmul: (-1x9216x192xf32) <- (-1x9216x768xf32, 768x192xf32) + matmul_11 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del gelu_1, parameter_278 + + # pd_op.add: (-1x9216x192xf32) <- (-1x9216x192xf32, 192xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_277) + del matmul_11, parameter_277 + + # pd_op.add: (-1x9216x192xf32) <- (-1x9216x192xf32, -1x9216x192xf32) + add_16 = paddle._C_ops.add(add_13, add_15) + del add_13, add_15 + + # pd_op.shape64: (3xi64) <- (-1x9216x192xf32) + shape64_9 = paddle._C_ops.shape64(add_16) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + shape64_9, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_9 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_12 = [slice_15, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_12 = paddle._C_ops.stack(combine_12, 0) + del combine_12 + + # pd_op.reshape: (-1x96x96x192xf32) <- (-1x9216x192xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_16, stack_12) + del add_16, stack_12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_30 = [2, 2] + + # pd_op.strided_slice: (-1x48x48x192xf32) <- (-1x96x96x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_31 = [1, 0] + + # pd_op.strided_slice: (-1x48x48x192xf32) <- (-1x96x96x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [0, 1] + + # pd_op.strided_slice: (-1x48x48x192xf32) <- (-1x96x96x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x48x48x192xf32) <- (-1x96x96x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + + # pd_op.shape64: (4xi64) <- (-1x96x96x192xf32) + shape64_10 = paddle._C_ops.shape64(reshape_30) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + shape64_10, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_10 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_13 = [slice_16, full_0, full_0, full_1] + del full_0, full_1, slice_16 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_13 = paddle._C_ops.stack(combine_13, 0) + del combine_13 + + # pd_op.reshape: (-1x96x96x192xf32) <- (-1x96x96x192xf32, 4xi64) + reshape_31 = paddle._C_ops.reshape(reshape_30, stack_13) + del reshape_30, stack_13 + + # pd_op.full: (1xi32) <- () + full_16 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([-1x48x48x192xf32, -1x48x48x192xf32, -1x48x48x192xf32, -1x48x48x192xf32]) <- (-1x48x48x192xf32, -1x48x48x192xf32, -1x48x48x192xf32, -1x48x48x192xf32) + combine_14 = [ + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + ] + del strided_slice_0, strided_slice_1, strided_slice_2, strided_slice_3 + + # pd_op.concat: (-1x48x48x768xf32) <- ([-1x48x48x192xf32, -1x48x48x192xf32, -1x48x48x192xf32, -1x48x48x192xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_14, full_16) + del combine_14 + + # pd_op.full: (xi64) <- () + full_17 = paddle._C_ops.full( + [], float("-1"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_18 = paddle._C_ops.full( + [], float("768"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_15 = [slice_15, full_17, full_18] + del slice_15 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_14 = paddle._C_ops.stack(combine_15, 0) + del combine_15 + + # pd_op.reshape: (-1x-1x768xf32) <- (-1x48x48x768xf32, 3xi64) + reshape_32 = paddle._C_ops.reshape(concat_0, stack_14) + del concat_0, stack_14 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_32, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276, reshape_32 + + # pd_op.matmul: (-1x-1x384xf32) <- (-1x-1x768xf32, 768x384xf32) + matmul_12 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del layer_norm_15, parameter_274 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_11 = paddle._C_ops.shape64(matmul_12) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + shape64_11, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_11 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_12 = paddle._C_ops.shape64(matmul_12) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + shape64_12, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_12 + + # pd_op.layer_norm: (-1x-1x384xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x384xf32, 384xf32, 384xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_12, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full: (xi64) <- () + full_19 = paddle._C_ops.full( + [], float("48"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_20 = paddle._C_ops.full( + [], float("384"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_16 = [slice_17, full_19, full_19, full_20] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_15 = paddle._C_ops.stack(combine_16, 0) + del combine_16 + + # pd_op.reshape: (-1x48x48x384xf32) <- (-1x-1x384xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(layer_norm_18, stack_15) + del layer_norm_18, stack_15 + + # pd_op.shape64: (4xi64) <- (-1x48x48x384xf32) + shape64_13 = paddle._C_ops.shape64(reshape_33) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + shape64_13, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_13 + + # pd_op.full: (xi64) <- () + full_21 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_17 = [slice_19, full_21, full_3, full_21, full_3, full_20] + del slice_19 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_16 = paddle._C_ops.stack(combine_17, 0) + del combine_17 + + # pd_op.reshape: (-1x4x12x4x12x384xf32) <- (-1x48x48x384xf32, 6xi64) + reshape_34 = paddle._C_ops.reshape(reshape_33, stack_16) + del reshape_33, stack_16 + + # pd_op.transpose: (-1x4x4x12x12x384xf32) <- (-1x4x12x4x12x384xf32) + transpose_14 = paddle._C_ops.transpose(reshape_34, [0, 1, 3, 2, 4, 5]) + del reshape_34 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_33 = [-1, 12, 12, 384] + + # pd_op.reshape: (-1x12x12x384xf32) <- (-1x4x4x12x12x384xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_14, full_int_array_33) + del transpose_14 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_34 = [-1, 144, 384] + + # pd_op.reshape: (-1x144x384xf32) <- (-1x12x12x384xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(reshape_35, full_int_array_34) + del reshape_35 + + # pd_op.shape64: (3xi64) <- (-1x144x384xf32) + shape64_14 = paddle._C_ops.shape64(reshape_36) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + shape64_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_14 + + # pd_op.matmul: (-1x144x1152xf32) <- (-1x144x384xf32, 384x1152xf32) + matmul_13 = paddle._C_ops.matmul(reshape_36, parameter_271, False, False) + del parameter_271, reshape_36 + + # pd_op.add: (-1x144x1152xf32) <- (-1x144x1152xf32, 1152xf32) + add_17 = paddle._C_ops.add(matmul_13, parameter_270) + del matmul_13, parameter_270 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_18 = [slice_20, full_4, full_5, full_3, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_17 = paddle._C_ops.stack(combine_18, 0) + del combine_18 + + # pd_op.reshape: (-1x144x3x12x32xf32) <- (-1x144x1152xf32, 5xi64) + reshape_37 = paddle._C_ops.reshape(add_17, stack_17) + del add_17, stack_17 + + # pd_op.transpose: (3x-1x12x144x32xf32) <- (-1x144x3x12x32xf32) + transpose_15 = paddle._C_ops.transpose(reshape_37, [2, 0, 3, 1, 4]) + del reshape_37 + + # pd_op.slice: (-1x12x144x32xf32) <- (3x-1x12x144x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x144x32xf32) <- (3x-1x12x144x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x144x32xf32) <- (3x-1x12x144x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_15 + + # pd_op.scale: (-1x12x144x32xf32) <- (-1x12x144x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_21, full_8, float("0"), True) + del slice_21 + + # pd_op.transpose: (-1x12x32x144xf32) <- (-1x12x144x32xf32) + transpose_16 = paddle._C_ops.transpose(slice_22, [0, 1, 3, 2]) + del slice_22 + + # pd_op.matmul: (-1x12x144x144xf32) <- (-1x12x144x32xf32, -1x12x32x144xf32) + matmul_14 = paddle._C_ops.matmul(scale_2, transpose_16, False, False) + del scale_2, transpose_16 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_38 = paddle._C_ops.reshape(data_5, full_int_array_7) + del data_5 + + # pd_op.index_select: (20736x12xf32) <- (529x12xf32, 20736xi64) + index_select_2 = paddle._C_ops.index_select(data_6, reshape_38, 0) + del data_6, reshape_38 + + # pd_op.reshape: (144x144x12xf32) <- (20736x12xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(index_select_2, full_int_array_8) + del index_select_2 + + # pd_op.transpose: (12x144x144xf32) <- (144x144x12xf32) + transpose_17 = paddle._C_ops.transpose(reshape_39, [2, 0, 1]) + del reshape_39 + + # pd_op.unsqueeze: (1x12x144x144xf32) <- (12x144x144xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_17, full_int_array_0) + del transpose_17 + + # pd_op.add: (-1x12x144x144xf32) <- (-1x12x144x144xf32, 1x12x144x144xf32) + add_18 = paddle._C_ops.add(matmul_14, unsqueeze_6) + del matmul_14, unsqueeze_6 + + # pd_op.softmax: (-1x12x144x144xf32) <- (-1x12x144x144xf32) + softmax_2 = paddle._C_ops.softmax(add_18, -1) + del add_18 + + # pd_op.matmul: (-1x12x144x32xf32) <- (-1x12x144x144xf32, -1x12x144x32xf32) + matmul_15 = paddle._C_ops.matmul(softmax_2, slice_23, False, False) + del slice_23, softmax_2 + + # pd_op.transpose: (-1x144x12x32xf32) <- (-1x12x144x32xf32) + transpose_18 = paddle._C_ops.transpose(matmul_15, [0, 2, 1, 3]) + del matmul_15 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_19 = [slice_20, full_4, full_20] + del slice_20 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_18 = paddle._C_ops.stack(combine_19, 0) + del combine_19 + + # pd_op.reshape: (-1x144x384xf32) <- (-1x144x12x32xf32, 3xi64) + reshape_40 = paddle._C_ops.reshape(transpose_18, stack_18) + del stack_18, transpose_18 + + # pd_op.matmul: (-1x144x384xf32) <- (-1x144x384xf32, 384x384xf32) + matmul_16 = paddle._C_ops.matmul(reshape_40, parameter_269, False, False) + del parameter_269, reshape_40 + + # pd_op.add: (-1x144x384xf32) <- (-1x144x384xf32, 384xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_268) + del matmul_16, parameter_268 + + # pd_op.reshape: (-1x12x12x384xf32) <- (-1x144x384xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_19, full_int_array_33) + del add_19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [-1, 4, 4, 12, 12, 384] + + # pd_op.reshape: (-1x4x4x12x12x384xf32) <- (-1x12x12x384xf32, 6xi64) + reshape_42 = paddle._C_ops.reshape(reshape_41, full_int_array_35) + del reshape_41 + + # pd_op.transpose: (-1x4x12x4x12x384xf32) <- (-1x4x4x12x12x384xf32) + transpose_19 = paddle._C_ops.transpose(reshape_42, [0, 1, 3, 2, 4, 5]) + del reshape_42 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 48, 48, 384] + + # pd_op.reshape: (-1x48x48x384xf32) <- (-1x4x12x4x12x384xf32, 4xi64) + reshape_43 = paddle._C_ops.reshape(transpose_19, full_int_array_36) + del transpose_19 + + # pd_op.full: (xi64) <- () + full_22 = paddle._C_ops.full( + [], float("2304"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_20 = [slice_17, full_22, full_20] + del slice_17 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_19 = paddle._C_ops.stack(combine_20, 0) + del combine_20 + + # pd_op.reshape: (-1x2304x384xf32) <- (-1x48x48x384xf32, 3xi64) + reshape_44 = paddle._C_ops.reshape(reshape_43, stack_19) + del reshape_43, stack_19 + + # pd_op.add: (-1x2304x384xf32) <- (-1x-1x384xf32, -1x2304x384xf32) + add_20 = paddle._C_ops.add(matmul_12, reshape_44) + del matmul_12, reshape_44 + + # pd_op.layer_norm: (-1x2304x384xf32, -1x2304xf32, -1x2304xf32) <- (-1x2304x384xf32, 384xf32, 384xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (-1x2304x1536xf32) <- (-1x2304x384xf32, 384x1536xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del layer_norm_21, parameter_265 + + # pd_op.add: (-1x2304x1536xf32) <- (-1x2304x1536xf32, 1536xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_264) + del matmul_17, parameter_264 + + # pd_op.gelu: (-1x2304x1536xf32) <- (-1x2304x1536xf32) + gelu_2 = paddle._C_ops.gelu(add_21, False) + del add_21 + + # pd_op.matmul: (-1x2304x384xf32) <- (-1x2304x1536xf32, 1536x384xf32) + matmul_18 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del gelu_2, parameter_263 + + # pd_op.add: (-1x2304x384xf32) <- (-1x2304x384xf32, 384xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_262) + del matmul_18, parameter_262 + + # pd_op.add: (-1x2304x384xf32) <- (-1x2304x384xf32, -1x2304x384xf32) + add_23 = paddle._C_ops.add(add_20, add_22) + del add_20, add_22 + + # pd_op.shape64: (3xi64) <- (-1x2304x384xf32) + shape64_15 = paddle._C_ops.shape64(add_23) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + shape64_15, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_15 + + # pd_op.layer_norm: (-1x2304x384xf32, -1x2304xf32, -1x2304xf32) <- (-1x2304x384xf32, 384xf32, 384xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_23, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_21 = [slice_24, full_19, full_19, full_20] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_20 = paddle._C_ops.stack(combine_21, 0) + del combine_21 + + # pd_op.reshape: (-1x48x48x384xf32) <- (-1x2304x384xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(layer_norm_24, stack_20) + del layer_norm_24, stack_20 + + # pd_op.shape64: (4xi64) <- (-1x48x48x384xf32) + shape64_16 = paddle._C_ops.shape64(reshape_45) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + shape64_16, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_16 + + # pd_op.roll: (-1x48x48x384xf32) <- (-1x48x48x384xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_45, full_int_array_11, [1, 2]) + del reshape_45 + + # pd_op.shape64: (4xi64) <- (-1x48x48x384xf32) + shape64_17 = paddle._C_ops.shape64(roll_2) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + shape64_17, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_17 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_22 = [slice_26, full_21, full_3, full_21, full_3, full_20] + del slice_26 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_21 = paddle._C_ops.stack(combine_22, 0) + del combine_22 + + # pd_op.reshape: (-1x4x12x4x12x384xf32) <- (-1x48x48x384xf32, 6xi64) + reshape_46 = paddle._C_ops.reshape(roll_2, stack_21) + del roll_2, stack_21 + + # pd_op.transpose: (-1x4x4x12x12x384xf32) <- (-1x4x12x4x12x384xf32) + transpose_20 = paddle._C_ops.transpose(reshape_46, [0, 1, 3, 2, 4, 5]) + del reshape_46 + + # pd_op.reshape: (-1x12x12x384xf32) <- (-1x4x4x12x12x384xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(transpose_20, full_int_array_33) + del transpose_20 + + # pd_op.reshape: (-1x144x384xf32) <- (-1x12x12x384xf32, 3xi64) + reshape_48 = paddle._C_ops.reshape(reshape_47, full_int_array_34) + del full_int_array_34, reshape_47 + + # pd_op.full: (1x48x48x1xf32) <- () + full_23 = paddle._C_ops.full( + [1, 48, 48, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_23, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_23 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x48x48x1xf32) <- (1x48x48x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_37 = [1, 4, 12, 4, 12, 1] + + # pd_op.reshape: (1x4x12x4x12x1xf32) <- (1x48x48x1xf32, 6xi64) + reshape_49 = paddle._C_ops.reshape(set_value__1, full_int_array_37) + del full_int_array_37 + + # pd_op.transpose: (1x4x4x12x12x1xf32) <- (1x4x12x4x12x1xf32) + transpose_21 = paddle._C_ops.transpose(reshape_49, [0, 1, 3, 2, 4, 5]) + del reshape_49 + + # pd_op.reshape: (16x12x12x1xf32) <- (1x4x4x12x12x1xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(transpose_21, full_int_array_27) + del transpose_21 + + # pd_op.reshape: (16x144xf32) <- (16x12x12x1xf32, 2xi64) + reshape_51 = paddle._C_ops.reshape(reshape_50, full_int_array_28) + del reshape_50 + + # pd_op.unsqueeze: (16x1x144xf32) <- (16x144xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(reshape_51, full_int_array_1) + + # pd_op.unsqueeze: (16x144x1xf32) <- (16x144xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(reshape_51, full_int_array_5) + del reshape_51 + + # pd_op.subtract: (16x144x144xf32) <- (16x1x144xf32, 16x144x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_7, unsqueeze_8) + del unsqueeze_7, unsqueeze_8 + + # pd_op.not_equal: (16x144x144xb) <- (16x144x144xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_11) + + # pd_op.full: (16x144x144xf32) <- () + full_24 = paddle._C_ops.full( + [16, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x144x144xf32) <- (16x144x144xb, 16x144x144xf32, 16x144x144xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_24, subtract_1) + del full_24, not_equal_1, subtract_1 + + # pd_op.equal: (16x144x144xb) <- (16x144x144xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_11) + + # pd_op.full: (16x144x144xf32) <- () + full_25 = paddle._C_ops.full( + [16, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x144x144xf32) <- (16x144x144xb, 16x144x144xf32, 16x144x144xf32) + where_3 = paddle._C_ops.where(equal_1, full_25, where_2) + del equal_1, full_25, where_2 + + # pd_op.shape64: (3xi64) <- (-1x144x384xf32) + shape64_18 = paddle._C_ops.shape64(reshape_48) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + shape64_18, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_18 + + # pd_op.matmul: (-1x144x1152xf32) <- (-1x144x384xf32, 384x1152xf32) + matmul_19 = paddle._C_ops.matmul(reshape_48, parameter_259, False, False) + del parameter_259, reshape_48 + + # pd_op.add: (-1x144x1152xf32) <- (-1x144x1152xf32, 1152xf32) + add_24 = paddle._C_ops.add(matmul_19, parameter_258) + del matmul_19, parameter_258 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_23 = [slice_27, full_4, full_5, full_3, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_22 = paddle._C_ops.stack(combine_23, 0) + del combine_23 + + # pd_op.reshape: (-1x144x3x12x32xf32) <- (-1x144x1152xf32, 5xi64) + reshape_52 = paddle._C_ops.reshape(add_24, stack_22) + del add_24, stack_22 + + # pd_op.transpose: (3x-1x12x144x32xf32) <- (-1x144x3x12x32xf32) + transpose_22 = paddle._C_ops.transpose(reshape_52, [2, 0, 3, 1, 4]) + del reshape_52 + + # pd_op.slice: (-1x12x144x32xf32) <- (3x-1x12x144x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x144x32xf32) <- (3x-1x12x144x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x144x32xf32) <- (3x-1x12x144x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_22 + + # pd_op.scale: (-1x12x144x32xf32) <- (-1x12x144x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_28, full_8, float("0"), True) + del slice_28 + + # pd_op.transpose: (-1x12x32x144xf32) <- (-1x12x144x32xf32) + transpose_23 = paddle._C_ops.transpose(slice_29, [0, 1, 3, 2]) + del slice_29 + + # pd_op.matmul: (-1x12x144x144xf32) <- (-1x12x144x32xf32, -1x12x32x144xf32) + matmul_20 = paddle._C_ops.matmul(scale_3, transpose_23, False, False) + del scale_3, transpose_23 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_53 = paddle._C_ops.reshape(data_7, full_int_array_7) + del data_7 + + # pd_op.index_select: (20736x12xf32) <- (529x12xf32, 20736xi64) + index_select_3 = paddle._C_ops.index_select(data_8, reshape_53, 0) + del data_8, reshape_53 + + # pd_op.reshape: (144x144x12xf32) <- (20736x12xf32, 3xi64) + reshape_54 = paddle._C_ops.reshape(index_select_3, full_int_array_8) + del index_select_3 + + # pd_op.transpose: (12x144x144xf32) <- (144x144x12xf32) + transpose_24 = paddle._C_ops.transpose(reshape_54, [2, 0, 1]) + del reshape_54 + + # pd_op.unsqueeze: (1x12x144x144xf32) <- (12x144x144xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_24, full_int_array_0) + del transpose_24 + + # pd_op.add: (-1x12x144x144xf32) <- (-1x12x144x144xf32, 1x12x144x144xf32) + add_25 = paddle._C_ops.add(matmul_20, unsqueeze_9) + del matmul_20, unsqueeze_9 + + # pd_op.full: (xi64) <- () + full_26 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_1 = paddle._C_ops.floor_divide(slice_27, full_26) + del full_26 + + # pd_op.full: (xi64) <- () + full_27 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_24 = [floor_divide_1, full_27, full_3, full_4, full_4] + del floor_divide_1, full_27 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_23 = paddle._C_ops.stack(combine_24, 0) + del combine_24 + + # pd_op.reshape: (-1x16x12x144x144xf32) <- (-1x12x144x144xf32, 5xi64) + reshape_55 = paddle._C_ops.reshape(add_25, stack_23) + del add_25, stack_23 + + # pd_op.unsqueeze: (16x1x144x144xf32) <- (16x144x144xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(where_3, full_int_array_1) + del where_3 + + # pd_op.unsqueeze: (1x16x1x144x144xf32) <- (16x1x144x144xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_10, full_int_array_0) + del unsqueeze_10 + + # pd_op.add: (-1x16x12x144x144xf32) <- (-1x16x12x144x144xf32, 1x16x1x144x144xf32) + add_26 = paddle._C_ops.add(reshape_55, unsqueeze_11) + del reshape_55, unsqueeze_11 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_25 = [slice_27, full_3, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_24 = paddle._C_ops.stack(combine_25, 0) + del combine_25 + + # pd_op.reshape: (-1x12x144x144xf32) <- (-1x16x12x144x144xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(add_26, stack_24) + del add_26, stack_24 + + # pd_op.softmax: (-1x12x144x144xf32) <- (-1x12x144x144xf32) + softmax_3 = paddle._C_ops.softmax(reshape_56, -1) + del reshape_56 + + # pd_op.matmul: (-1x12x144x32xf32) <- (-1x12x144x144xf32, -1x12x144x32xf32) + matmul_21 = paddle._C_ops.matmul(softmax_3, slice_30, False, False) + del slice_30, softmax_3 + + # pd_op.transpose: (-1x144x12x32xf32) <- (-1x12x144x32xf32) + transpose_25 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_26 = [slice_27, full_4, full_20] + del slice_27 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_25 = paddle._C_ops.stack(combine_26, 0) + del combine_26 + + # pd_op.reshape: (-1x144x384xf32) <- (-1x144x12x32xf32, 3xi64) + reshape_57 = paddle._C_ops.reshape(transpose_25, stack_25) + del stack_25, transpose_25 + + # pd_op.matmul: (-1x144x384xf32) <- (-1x144x384xf32, 384x384xf32) + matmul_22 = paddle._C_ops.matmul(reshape_57, parameter_257, False, False) + del parameter_257, reshape_57 + + # pd_op.add: (-1x144x384xf32) <- (-1x144x384xf32, 384xf32) + add_27 = paddle._C_ops.add(matmul_22, parameter_256) + del matmul_22, parameter_256 + + # pd_op.reshape: (-1x12x12x384xf32) <- (-1x144x384xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(add_27, full_int_array_33) + del add_27, full_int_array_33 + + # pd_op.reshape: (-1x4x4x12x12x384xf32) <- (-1x12x12x384xf32, 6xi64) + reshape_59 = paddle._C_ops.reshape(reshape_58, full_int_array_35) + del full_int_array_35, reshape_58 + + # pd_op.transpose: (-1x4x12x4x12x384xf32) <- (-1x4x4x12x12x384xf32) + transpose_26 = paddle._C_ops.transpose(reshape_59, [0, 1, 3, 2, 4, 5]) + del reshape_59 + + # pd_op.reshape: (-1x48x48x384xf32) <- (-1x4x12x4x12x384xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(transpose_26, full_int_array_36) + del full_int_array_36, transpose_26 + + # pd_op.roll: (-1x48x48x384xf32) <- (-1x48x48x384xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_60, full_int_array_29, [1, 2]) + del reshape_60 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_27 = [slice_24, full_22, full_20] + del full_22, slice_24 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_26 = paddle._C_ops.stack(combine_27, 0) + del combine_27 + + # pd_op.reshape: (-1x2304x384xf32) <- (-1x48x48x384xf32, 3xi64) + reshape_61 = paddle._C_ops.reshape(roll_3, stack_26) + del roll_3, stack_26 + + # pd_op.add: (-1x2304x384xf32) <- (-1x2304x384xf32, -1x2304x384xf32) + add_28 = paddle._C_ops.add(add_23, reshape_61) + del add_23, reshape_61 + + # pd_op.layer_norm: (-1x2304x384xf32, -1x2304xf32, -1x2304xf32) <- (-1x2304x384xf32, 384xf32, 384xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_28, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (-1x2304x1536xf32) <- (-1x2304x384xf32, 384x1536xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del layer_norm_27, parameter_253 + + # pd_op.add: (-1x2304x1536xf32) <- (-1x2304x1536xf32, 1536xf32) + add_29 = paddle._C_ops.add(matmul_23, parameter_252) + del matmul_23, parameter_252 + + # pd_op.gelu: (-1x2304x1536xf32) <- (-1x2304x1536xf32) + gelu_3 = paddle._C_ops.gelu(add_29, False) + del add_29 + + # pd_op.matmul: (-1x2304x384xf32) <- (-1x2304x1536xf32, 1536x384xf32) + matmul_24 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del gelu_3, parameter_251 + + # pd_op.add: (-1x2304x384xf32) <- (-1x2304x384xf32, 384xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_250) + del matmul_24, parameter_250 + + # pd_op.add: (-1x2304x384xf32) <- (-1x2304x384xf32, -1x2304x384xf32) + add_31 = paddle._C_ops.add(add_28, add_30) + del add_28, add_30 + + # pd_op.shape64: (3xi64) <- (-1x2304x384xf32) + shape64_19 = paddle._C_ops.shape64(add_31) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + shape64_19, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_19 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_28 = [slice_31, full_19, full_19, full_20] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_27 = paddle._C_ops.stack(combine_28, 0) + del combine_28 + + # pd_op.reshape: (-1x48x48x384xf32) <- (-1x2304x384xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(add_31, stack_27) + del add_31, stack_27 + + # pd_op.strided_slice: (-1x24x24x384xf32) <- (-1x48x48x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x24x24x384xf32) <- (-1x48x48x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x24x24x384xf32) <- (-1x48x48x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x24x24x384xf32) <- (-1x48x48x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + + # pd_op.shape64: (4xi64) <- (-1x48x48x384xf32) + shape64_20 = paddle._C_ops.shape64(reshape_62) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + shape64_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_20 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_29 = [slice_32, full_19, full_19, full_20] + del full_20, slice_32 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_28 = paddle._C_ops.stack(combine_29, 0) + del combine_29 + + # pd_op.reshape: (-1x48x48x384xf32) <- (-1x48x48x384xf32, 4xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, stack_28) + del reshape_62, stack_28 + + # builtin.combine: ([-1x24x24x384xf32, -1x24x24x384xf32, -1x24x24x384xf32, -1x24x24x384xf32]) <- (-1x24x24x384xf32, -1x24x24x384xf32, -1x24x24x384xf32, -1x24x24x384xf32) + combine_30 = [ + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + ] + del strided_slice_4, strided_slice_5, strided_slice_6, strided_slice_7 + + # pd_op.concat: (-1x24x24x1536xf32) <- ([-1x24x24x384xf32, -1x24x24x384xf32, -1x24x24x384xf32, -1x24x24x384xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_30, full_16) + del combine_30 + + # pd_op.full: (xi64) <- () + full_28 = paddle._C_ops.full( + [], float("1536"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_31 = [slice_31, full_17, full_28] + del slice_31 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_29 = paddle._C_ops.stack(combine_31, 0) + del combine_31 + + # pd_op.reshape: (-1x-1x1536xf32) <- (-1x24x24x1536xf32, 3xi64) + reshape_64 = paddle._C_ops.reshape(concat_1, stack_29) + del concat_1, stack_29 + + # pd_op.layer_norm: (-1x-1x1536xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1536xf32, 1536xf32, 1536xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_64, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249, reshape_64 + + # pd_op.matmul: (-1x-1x768xf32) <- (-1x-1x1536xf32, 1536x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del layer_norm_30, parameter_247 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_21 = paddle._C_ops.shape64(matmul_25) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + shape64_21, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_21 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_22 = paddle._C_ops.shape64(matmul_25) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + shape64_22, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_22 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_25, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full: (xi64) <- () + full_29 = paddle._C_ops.full( + [], float("24"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_32 = [slice_33, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_30 = paddle._C_ops.stack(combine_32, 0) + del combine_32 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x-1x768xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(layer_norm_33, stack_30) + del layer_norm_33, stack_30 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_23 = paddle._C_ops.shape64(reshape_65) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + shape64_23, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_23 + + # pd_op.full: (xi64) <- () + full_30 = paddle._C_ops.full( + [], float("2"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_33 = [slice_35, full_30, full_3, full_30, full_3, full_18] + del slice_35 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_31 = paddle._C_ops.stack(combine_33, 0) + del combine_33 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, stack_31) + del reshape_65, stack_31 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_27 = paddle._C_ops.transpose(reshape_66, [0, 1, 3, 2, 4, 5]) + del reshape_66 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_38 = [-1, 12, 12, 768] + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_67 = paddle._C_ops.reshape(transpose_27, full_int_array_38) + del transpose_27 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_39 = [-1, 144, 768] + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_68 = paddle._C_ops.reshape(reshape_67, full_int_array_39) + del reshape_67 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_24 = paddle._C_ops.shape64(reshape_68) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + shape64_24, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_24 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_26 = paddle._C_ops.matmul(reshape_68, parameter_244, False, False) + del parameter_244, reshape_68 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_32 = paddle._C_ops.add(matmul_26, parameter_243) + del matmul_26, parameter_243 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_34 = [slice_36, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_32 = paddle._C_ops.stack(combine_34, 0) + del combine_34 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_69 = paddle._C_ops.reshape(add_32, stack_32) + del add_32, stack_32 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_28 = paddle._C_ops.transpose(reshape_69, [2, 0, 3, 1, 4]) + del reshape_69 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_28 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_37, full_8, float("0"), True) + del slice_37 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_29 = paddle._C_ops.transpose(slice_38, [0, 1, 3, 2]) + del slice_38 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_27 = paddle._C_ops.matmul(scale_4, transpose_29, False, False) + del scale_4, transpose_29 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_70 = paddle._C_ops.reshape(data_9, full_int_array_7) + del data_9 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_4 = paddle._C_ops.index_select(data_10, reshape_70, 0) + del data_10, reshape_70 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_71 = paddle._C_ops.reshape(index_select_4, full_int_array_8) + del index_select_4 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_30 = paddle._C_ops.transpose(reshape_71, [2, 0, 1]) + del reshape_71 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_30, full_int_array_0) + del transpose_30 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_33 = paddle._C_ops.add(matmul_27, unsqueeze_12) + del matmul_27, unsqueeze_12 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_4 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_28 = paddle._C_ops.matmul(softmax_4, slice_39, False, False) + del slice_39, softmax_4 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_31 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_35 = [slice_36, full_4, full_18] + del slice_36 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_33 = paddle._C_ops.stack(combine_35, 0) + del combine_35 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(transpose_31, stack_33) + del stack_33, transpose_31 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_29 = paddle._C_ops.matmul(reshape_72, parameter_242, False, False) + del parameter_242, reshape_72 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_34 = paddle._C_ops.add(matmul_29, parameter_241) + del matmul_29, parameter_241 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(add_34, full_int_array_38) + del add_34 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_40 = [-1, 2, 2, 12, 12, 768] + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_74 = paddle._C_ops.reshape(reshape_73, full_int_array_40) + del reshape_73 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_32 = paddle._C_ops.transpose(reshape_74, [0, 1, 3, 2, 4, 5]) + del reshape_74 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_41 = [-1, 24, 24, 768] + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_75 = paddle._C_ops.reshape(transpose_32, full_int_array_41) + del transpose_32 + + # pd_op.full: (xi64) <- () + full_31 = paddle._C_ops.full( + [], float("576"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_36 = [slice_33, full_31, full_18] + del slice_33 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_34 = paddle._C_ops.stack(combine_36, 0) + del combine_36 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_76 = paddle._C_ops.reshape(reshape_75, stack_34) + del reshape_75, stack_34 + + # pd_op.add: (-1x576x768xf32) <- (-1x-1x768xf32, -1x576x768xf32) + add_35 = paddle._C_ops.add(matmul_25, reshape_76) + del matmul_25, reshape_76 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del layer_norm_36, parameter_238 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_36 = paddle._C_ops.add(matmul_30, parameter_237) + del matmul_30, parameter_237 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_31 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del gelu_4, parameter_236 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_37 = paddle._C_ops.add(matmul_31, parameter_235) + del matmul_31, parameter_235 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_38 = paddle._C_ops.add(add_35, add_37) + del add_35, add_37 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_25 = paddle._C_ops.shape64(add_38) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + shape64_25, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_25 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_37 = [slice_40, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_35 = paddle._C_ops.stack(combine_37, 0) + del combine_37 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(layer_norm_39, stack_35) + del layer_norm_39, stack_35 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_26 = paddle._C_ops.shape64(reshape_77) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + shape64_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_26 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_77, full_int_array_11, [1, 2]) + del reshape_77 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_27 = paddle._C_ops.shape64(roll_4) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + shape64_27, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_38 = [slice_42, full_30, full_3, full_30, full_3, full_18] + del slice_42 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_36 = paddle._C_ops.stack(combine_38, 0) + del combine_38 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_78 = paddle._C_ops.reshape(roll_4, stack_36) + del roll_4, stack_36 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_33 = paddle._C_ops.transpose(reshape_78, [0, 1, 3, 2, 4, 5]) + del reshape_78 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_33, full_int_array_38) + del transpose_33 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, full_int_array_39) + del reshape_79 + + # pd_op.full: (1x24x24x1xf32) <- () + full_32 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_32, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_32 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_42 = [1, 2, 12, 2, 12, 1] + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_81 = paddle._C_ops.reshape(set_value__2, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_34 = paddle._C_ops.transpose(reshape_81, [0, 1, 3, 2, 4, 5]) + del reshape_81 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_34, full_int_array_27) + del transpose_34 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_28) + del reshape_82 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(reshape_83, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(reshape_83, full_int_array_5) + del reshape_83 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_13, unsqueeze_14) + del unsqueeze_13, unsqueeze_14 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_11) + + # pd_op.full: (4x144x144xf32) <- () + full_33 = paddle._C_ops.full( + [4, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_33, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_11) + + # pd_op.full: (4x144x144xf32) <- () + full_34 = paddle._C_ops.full( + [4, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_5 = paddle._C_ops.where(equal_2, full_34, where_4) + del equal_2, where_4 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_28 = paddle._C_ops.shape64(reshape_80) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + shape64_28, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_28 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_32 = paddle._C_ops.matmul(reshape_80, parameter_232, False, False) + del parameter_232, reshape_80 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_39 = paddle._C_ops.add(matmul_32, parameter_231) + del matmul_32, parameter_231 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_39 = [slice_43, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_37 = paddle._C_ops.stack(combine_39, 0) + del combine_39 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_84 = paddle._C_ops.reshape(add_39, stack_37) + del add_39, stack_37 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_35 = paddle._C_ops.transpose(reshape_84, [2, 0, 3, 1, 4]) + del reshape_84 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_35 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_44, full_8, float("0"), True) + del slice_44 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_36 = paddle._C_ops.transpose(slice_45, [0, 1, 3, 2]) + del slice_45 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_33 = paddle._C_ops.matmul(scale_5, transpose_36, False, False) + del scale_5, transpose_36 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_85 = paddle._C_ops.reshape(data_11, full_int_array_7) + del data_11 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_5 = paddle._C_ops.index_select(data_12, reshape_85, 0) + del data_12, reshape_85 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(index_select_5, full_int_array_8) + del index_select_5 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_37 = paddle._C_ops.transpose(reshape_86, [2, 0, 1]) + del reshape_86 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_37, full_int_array_0) + del transpose_37 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_40 = paddle._C_ops.add(matmul_33, unsqueeze_15) + del matmul_33, unsqueeze_15 + + # pd_op.full: (xi64) <- () + full_35 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_2 = paddle._C_ops.floor_divide(slice_43, full_35) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_40 = [floor_divide_2, full_21, full_29, full_4, full_4] + del floor_divide_2 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_38 = paddle._C_ops.stack(combine_40, 0) + del combine_40 + + # pd_op.reshape: (-1x4x24x144x144xf32) <- (-1x24x144x144xf32, 5xi64) + reshape_87 = paddle._C_ops.reshape(add_40, stack_38) + del add_40, stack_38 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(where_5, full_int_array_1) + del where_5 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_16, full_int_array_0) + del unsqueeze_16 + + # pd_op.add: (-1x4x24x144x144xf32) <- (-1x4x24x144x144xf32, 1x4x1x144x144xf32) + add_41 = paddle._C_ops.add(reshape_87, unsqueeze_17) + del reshape_87, unsqueeze_17 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_41 = [slice_43, full_29, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_39 = paddle._C_ops.stack(combine_41, 0) + del combine_41 + + # pd_op.reshape: (-1x24x144x144xf32) <- (-1x4x24x144x144xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(add_41, stack_39) + del add_41, stack_39 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_5 = paddle._C_ops.softmax(reshape_88, -1) + del reshape_88 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_34 = paddle._C_ops.matmul(softmax_5, slice_46, False, False) + del slice_46, softmax_5 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_38 = paddle._C_ops.transpose(matmul_34, [0, 2, 1, 3]) + del matmul_34 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_42 = [slice_43, full_4, full_18] + del slice_43 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_40 = paddle._C_ops.stack(combine_42, 0) + del combine_42 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(transpose_38, stack_40) + del stack_40, transpose_38 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_35 = paddle._C_ops.matmul(reshape_89, parameter_230, False, False) + del parameter_230, reshape_89 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_42 = paddle._C_ops.add(matmul_35, parameter_229) + del matmul_35, parameter_229 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(add_42, full_int_array_38) + del add_42 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_91 = paddle._C_ops.reshape(reshape_90, full_int_array_40) + del reshape_90 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_39 = paddle._C_ops.transpose(reshape_91, [0, 1, 3, 2, 4, 5]) + del reshape_91 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(transpose_39, full_int_array_41) + del transpose_39 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_92, full_int_array_29, [1, 2]) + del reshape_92 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_43 = [slice_40, full_31, full_18] + del slice_40 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_41 = paddle._C_ops.stack(combine_43, 0) + del combine_43 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_93 = paddle._C_ops.reshape(roll_5, stack_41) + del roll_5, stack_41 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_43 = paddle._C_ops.add(add_38, reshape_93) + del add_38, reshape_93 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_43, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_36 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del layer_norm_42, parameter_226 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_44 = paddle._C_ops.add(matmul_36, parameter_225) + del matmul_36, parameter_225 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_44, False) + del add_44 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_37 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del gelu_5, parameter_224 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_45 = paddle._C_ops.add(matmul_37, parameter_223) + del matmul_37, parameter_223 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_46 = paddle._C_ops.add(add_43, add_45) + del add_43, add_45 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_29 = paddle._C_ops.shape64(add_46) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + shape64_29, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_29 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_46, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_44 = [slice_47, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_42 = paddle._C_ops.stack(combine_44, 0) + del combine_44 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_94 = paddle._C_ops.reshape(layer_norm_45, stack_42) + del layer_norm_45, stack_42 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_30 = paddle._C_ops.shape64(reshape_94) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + shape64_30, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_30 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_45 = [slice_48, full_30, full_3, full_30, full_3, full_18] + del slice_48 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_43 = paddle._C_ops.stack(combine_45, 0) + del combine_45 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_95 = paddle._C_ops.reshape(reshape_94, stack_43) + del reshape_94, stack_43 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_40 = paddle._C_ops.transpose(reshape_95, [0, 1, 3, 2, 4, 5]) + del reshape_95 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_40, full_int_array_38) + del transpose_40 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, full_int_array_39) + del reshape_96 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_31 = paddle._C_ops.shape64(reshape_97) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + shape64_31, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_31 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_38 = paddle._C_ops.matmul(reshape_97, parameter_220, False, False) + del parameter_220, reshape_97 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_47 = paddle._C_ops.add(matmul_38, parameter_219) + del matmul_38, parameter_219 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_46 = [slice_49, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_44 = paddle._C_ops.stack(combine_46, 0) + del combine_46 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_98 = paddle._C_ops.reshape(add_47, stack_44) + del add_47, stack_44 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_41 = paddle._C_ops.transpose(reshape_98, [2, 0, 3, 1, 4]) + del reshape_98 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_41 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_50, full_8, float("0"), True) + del slice_50 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_42 = paddle._C_ops.transpose(slice_51, [0, 1, 3, 2]) + del slice_51 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_39 = paddle._C_ops.matmul(scale_6, transpose_42, False, False) + del scale_6, transpose_42 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_99 = paddle._C_ops.reshape(data_13, full_int_array_7) + del data_13 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_6 = paddle._C_ops.index_select(data_14, reshape_99, 0) + del data_14, reshape_99 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(index_select_6, full_int_array_8) + del index_select_6 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_43 = paddle._C_ops.transpose(reshape_100, [2, 0, 1]) + del reshape_100 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_43, full_int_array_0) + del transpose_43 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_48 = paddle._C_ops.add(matmul_39, unsqueeze_18) + del matmul_39, unsqueeze_18 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_6 = paddle._C_ops.softmax(add_48, -1) + del add_48 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_40 = paddle._C_ops.matmul(softmax_6, slice_52, False, False) + del slice_52, softmax_6 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_44 = paddle._C_ops.transpose(matmul_40, [0, 2, 1, 3]) + del matmul_40 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_47 = [slice_49, full_4, full_18] + del slice_49 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_45 = paddle._C_ops.stack(combine_47, 0) + del combine_47 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_101 = paddle._C_ops.reshape(transpose_44, stack_45) + del stack_45, transpose_44 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(reshape_101, parameter_218, False, False) + del parameter_218, reshape_101 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_49 = paddle._C_ops.add(matmul_41, parameter_217) + del matmul_41, parameter_217 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_102 = paddle._C_ops.reshape(add_49, full_int_array_38) + del add_49 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_103 = paddle._C_ops.reshape(reshape_102, full_int_array_40) + del reshape_102 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_45 = paddle._C_ops.transpose(reshape_103, [0, 1, 3, 2, 4, 5]) + del reshape_103 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(transpose_45, full_int_array_41) + del transpose_45 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_48 = [slice_47, full_31, full_18] + del slice_47 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_46 = paddle._C_ops.stack(combine_48, 0) + del combine_48 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_105 = paddle._C_ops.reshape(reshape_104, stack_46) + del reshape_104, stack_46 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_50 = paddle._C_ops.add(add_46, reshape_105) + del add_46, reshape_105 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_50, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del layer_norm_48, parameter_214 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_51 = paddle._C_ops.add(matmul_42, parameter_213) + del matmul_42, parameter_213 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_51, False) + del add_51 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_43 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del gelu_6, parameter_212 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_52 = paddle._C_ops.add(matmul_43, parameter_211) + del matmul_43, parameter_211 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_53 = paddle._C_ops.add(add_50, add_52) + del add_50, add_52 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_32 = paddle._C_ops.shape64(add_53) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + shape64_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_32 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_49 = [slice_53, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_47 = paddle._C_ops.stack(combine_49, 0) + del combine_49 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_106 = paddle._C_ops.reshape(layer_norm_51, stack_47) + del layer_norm_51, stack_47 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_33 = paddle._C_ops.shape64(reshape_106) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + shape64_33, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_33 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_106, full_int_array_11, [1, 2]) + del reshape_106 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_34 = paddle._C_ops.shape64(roll_6) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + shape64_34, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_34 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_50 = [slice_55, full_30, full_3, full_30, full_3, full_18] + del slice_55 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_48 = paddle._C_ops.stack(combine_50, 0) + del combine_50 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_107 = paddle._C_ops.reshape(roll_6, stack_48) + del roll_6, stack_48 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_46 = paddle._C_ops.transpose(reshape_107, [0, 1, 3, 2, 4, 5]) + del reshape_107 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_46, full_int_array_38) + del transpose_46 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_39) + del reshape_108 + + # pd_op.full: (1x24x24x1xf32) <- () + full_36 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_36, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_36 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_110 = paddle._C_ops.reshape(set_value__3, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_47 = paddle._C_ops.transpose(reshape_110, [0, 1, 3, 2, 4, 5]) + del reshape_110 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_111 = paddle._C_ops.reshape(transpose_47, full_int_array_27) + del transpose_47 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_112 = paddle._C_ops.reshape(reshape_111, full_int_array_28) + del reshape_111 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(reshape_112, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(reshape_112, full_int_array_5) + del reshape_112 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_19, unsqueeze_20) + del unsqueeze_19, unsqueeze_20 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_33, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_7 = paddle._C_ops.where(equal_3, full_34, where_6) + del equal_3, where_6 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_35 = paddle._C_ops.shape64(reshape_109) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + shape64_35, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_35 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_44 = paddle._C_ops.matmul(reshape_109, parameter_208, False, False) + del parameter_208, reshape_109 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_54 = paddle._C_ops.add(matmul_44, parameter_207) + del matmul_44, parameter_207 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_51 = [slice_56, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_49 = paddle._C_ops.stack(combine_51, 0) + del combine_51 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_113 = paddle._C_ops.reshape(add_54, stack_49) + del add_54, stack_49 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_48 = paddle._C_ops.transpose(reshape_113, [2, 0, 3, 1, 4]) + del reshape_113 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_48 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_57, full_8, float("0"), True) + del slice_57 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_49 = paddle._C_ops.transpose(slice_58, [0, 1, 3, 2]) + del slice_58 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_45 = paddle._C_ops.matmul(scale_7, transpose_49, False, False) + del scale_7, transpose_49 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_114 = paddle._C_ops.reshape(data_15, full_int_array_7) + del data_15 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_7 = paddle._C_ops.index_select(data_16, reshape_114, 0) + del data_16, reshape_114 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_115 = paddle._C_ops.reshape(index_select_7, full_int_array_8) + del index_select_7 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_50 = paddle._C_ops.transpose(reshape_115, [2, 0, 1]) + del reshape_115 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_50, full_int_array_0) + del transpose_50 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_55 = paddle._C_ops.add(matmul_45, unsqueeze_21) + del matmul_45, unsqueeze_21 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_3 = paddle._C_ops.floor_divide(slice_56, full_35) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_52 = [floor_divide_3, full_21, full_29, full_4, full_4] + del floor_divide_3 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_50 = paddle._C_ops.stack(combine_52, 0) + del combine_52 + + # pd_op.reshape: (-1x4x24x144x144xf32) <- (-1x24x144x144xf32, 5xi64) + reshape_116 = paddle._C_ops.reshape(add_55, stack_50) + del add_55, stack_50 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(where_7, full_int_array_1) + del where_7 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_22, full_int_array_0) + del unsqueeze_22 + + # pd_op.add: (-1x4x24x144x144xf32) <- (-1x4x24x144x144xf32, 1x4x1x144x144xf32) + add_56 = paddle._C_ops.add(reshape_116, unsqueeze_23) + del reshape_116, unsqueeze_23 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_53 = [slice_56, full_29, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_51 = paddle._C_ops.stack(combine_53, 0) + del combine_53 + + # pd_op.reshape: (-1x24x144x144xf32) <- (-1x4x24x144x144xf32, 4xi64) + reshape_117 = paddle._C_ops.reshape(add_56, stack_51) + del add_56, stack_51 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_7 = paddle._C_ops.softmax(reshape_117, -1) + del reshape_117 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_46 = paddle._C_ops.matmul(softmax_7, slice_59, False, False) + del slice_59, softmax_7 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_51 = paddle._C_ops.transpose(matmul_46, [0, 2, 1, 3]) + del matmul_46 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_54 = [slice_56, full_4, full_18] + del slice_56 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_52 = paddle._C_ops.stack(combine_54, 0) + del combine_54 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_118 = paddle._C_ops.reshape(transpose_51, stack_52) + del stack_52, transpose_51 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_47 = paddle._C_ops.matmul(reshape_118, parameter_206, False, False) + del parameter_206, reshape_118 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_57 = paddle._C_ops.add(matmul_47, parameter_205) + del matmul_47, parameter_205 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_119 = paddle._C_ops.reshape(add_57, full_int_array_38) + del add_57 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_120 = paddle._C_ops.reshape(reshape_119, full_int_array_40) + del reshape_119 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_52 = paddle._C_ops.transpose(reshape_120, [0, 1, 3, 2, 4, 5]) + del reshape_120 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(transpose_52, full_int_array_41) + del transpose_52 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_121, full_int_array_29, [1, 2]) + del reshape_121 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_55 = [slice_53, full_31, full_18] + del slice_53 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_53 = paddle._C_ops.stack(combine_55, 0) + del combine_55 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_122 = paddle._C_ops.reshape(roll_7, stack_53) + del roll_7, stack_53 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_58 = paddle._C_ops.add(add_53, reshape_122) + del add_53, reshape_122 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_58, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del layer_norm_54, parameter_202 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_59 = paddle._C_ops.add(matmul_48, parameter_201) + del matmul_48, parameter_201 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_59, False) + del add_59 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_49 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del gelu_7, parameter_200 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_60 = paddle._C_ops.add(matmul_49, parameter_199) + del matmul_49, parameter_199 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_61 = paddle._C_ops.add(add_58, add_60) + del add_58, add_60 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_36 = paddle._C_ops.shape64(add_61) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + shape64_36, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_36 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_61, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_56 = [slice_60, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_54 = paddle._C_ops.stack(combine_56, 0) + del combine_56 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_123 = paddle._C_ops.reshape(layer_norm_57, stack_54) + del layer_norm_57, stack_54 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_37 = paddle._C_ops.shape64(reshape_123) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + shape64_37, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_37 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_57 = [slice_61, full_30, full_3, full_30, full_3, full_18] + del slice_61 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_55 = paddle._C_ops.stack(combine_57, 0) + del combine_57 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_124 = paddle._C_ops.reshape(reshape_123, stack_55) + del reshape_123, stack_55 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_53 = paddle._C_ops.transpose(reshape_124, [0, 1, 3, 2, 4, 5]) + del reshape_124 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_53, full_int_array_38) + del transpose_53 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_39) + del reshape_125 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_38 = paddle._C_ops.shape64(reshape_126) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + shape64_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_38 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_50 = paddle._C_ops.matmul(reshape_126, parameter_196, False, False) + del parameter_196, reshape_126 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_62 = paddle._C_ops.add(matmul_50, parameter_195) + del matmul_50, parameter_195 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_58 = [slice_62, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_56 = paddle._C_ops.stack(combine_58, 0) + del combine_58 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_127 = paddle._C_ops.reshape(add_62, stack_56) + del add_62, stack_56 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_54 = paddle._C_ops.transpose(reshape_127, [2, 0, 3, 1, 4]) + del reshape_127 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_54 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_63, full_8, float("0"), True) + del slice_63 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_55 = paddle._C_ops.transpose(slice_64, [0, 1, 3, 2]) + del slice_64 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_51 = paddle._C_ops.matmul(scale_8, transpose_55, False, False) + del scale_8, transpose_55 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_128 = paddle._C_ops.reshape(data_17, full_int_array_7) + del data_17 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_8 = paddle._C_ops.index_select(data_18, reshape_128, 0) + del data_18, reshape_128 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_129 = paddle._C_ops.reshape(index_select_8, full_int_array_8) + del index_select_8 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_56 = paddle._C_ops.transpose(reshape_129, [2, 0, 1]) + del reshape_129 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_56, full_int_array_0) + del transpose_56 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_63 = paddle._C_ops.add(matmul_51, unsqueeze_24) + del matmul_51, unsqueeze_24 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_8 = paddle._C_ops.softmax(add_63, -1) + del add_63 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_52 = paddle._C_ops.matmul(softmax_8, slice_65, False, False) + del slice_65, softmax_8 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_57 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_59 = [slice_62, full_4, full_18] + del slice_62 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_57 = paddle._C_ops.stack(combine_59, 0) + del combine_59 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_130 = paddle._C_ops.reshape(transpose_57, stack_57) + del stack_57, transpose_57 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_53 = paddle._C_ops.matmul(reshape_130, parameter_194, False, False) + del parameter_194, reshape_130 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_64 = paddle._C_ops.add(matmul_53, parameter_193) + del matmul_53, parameter_193 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_131 = paddle._C_ops.reshape(add_64, full_int_array_38) + del add_64 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_132 = paddle._C_ops.reshape(reshape_131, full_int_array_40) + del reshape_131 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_58 = paddle._C_ops.transpose(reshape_132, [0, 1, 3, 2, 4, 5]) + del reshape_132 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_58, full_int_array_41) + del transpose_58 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_60 = [slice_60, full_31, full_18] + del slice_60 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_58 = paddle._C_ops.stack(combine_60, 0) + del combine_60 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, stack_58) + del reshape_133, stack_58 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_65 = paddle._C_ops.add(add_61, reshape_134) + del add_61, reshape_134 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_65, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del layer_norm_60, parameter_190 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_66 = paddle._C_ops.add(matmul_54, parameter_189) + del matmul_54, parameter_189 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_66, False) + del add_66 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_55 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del gelu_8, parameter_188 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_67 = paddle._C_ops.add(matmul_55, parameter_187) + del matmul_55, parameter_187 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_68 = paddle._C_ops.add(add_65, add_67) + del add_65, add_67 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_39 = paddle._C_ops.shape64(add_68) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + shape64_39, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_39 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_68, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_61 = [slice_66, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_59 = paddle._C_ops.stack(combine_61, 0) + del combine_61 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_135 = paddle._C_ops.reshape(layer_norm_63, stack_59) + del layer_norm_63, stack_59 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_40 = paddle._C_ops.shape64(reshape_135) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + shape64_40, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_40 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_135, full_int_array_11, [1, 2]) + del reshape_135 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_41 = paddle._C_ops.shape64(roll_8) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + shape64_41, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_41 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_62 = [slice_68, full_30, full_3, full_30, full_3, full_18] + del slice_68 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_60 = paddle._C_ops.stack(combine_62, 0) + del combine_62 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_136 = paddle._C_ops.reshape(roll_8, stack_60) + del roll_8, stack_60 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_59 = paddle._C_ops.transpose(reshape_136, [0, 1, 3, 2, 4, 5]) + del reshape_136 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_137 = paddle._C_ops.reshape(transpose_59, full_int_array_38) + del transpose_59 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_138 = paddle._C_ops.reshape(reshape_137, full_int_array_39) + del reshape_137 + + # pd_op.full: (1x24x24x1xf32) <- () + full_37 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_37, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_37 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_139 = paddle._C_ops.reshape(set_value__4, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_60 = paddle._C_ops.transpose(reshape_139, [0, 1, 3, 2, 4, 5]) + del reshape_139 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_140 = paddle._C_ops.reshape(transpose_60, full_int_array_27) + del transpose_60 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_141 = paddle._C_ops.reshape(reshape_140, full_int_array_28) + del reshape_140 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(reshape_141, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(reshape_141, full_int_array_5) + del reshape_141 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_25, unsqueeze_26) + del unsqueeze_25, unsqueeze_26 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_33, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_9 = paddle._C_ops.where(equal_4, full_34, where_8) + del equal_4, where_8 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_42 = paddle._C_ops.shape64(reshape_138) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + shape64_42, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_42 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_56 = paddle._C_ops.matmul(reshape_138, parameter_184, False, False) + del parameter_184, reshape_138 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_69 = paddle._C_ops.add(matmul_56, parameter_183) + del matmul_56, parameter_183 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_63 = [slice_69, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_61 = paddle._C_ops.stack(combine_63, 0) + del combine_63 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_142 = paddle._C_ops.reshape(add_69, stack_61) + del add_69, stack_61 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_61 = paddle._C_ops.transpose(reshape_142, [2, 0, 3, 1, 4]) + del reshape_142 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_72 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_61 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_70, full_8, float("0"), True) + del slice_70 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_62 = paddle._C_ops.transpose(slice_71, [0, 1, 3, 2]) + del slice_71 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_57 = paddle._C_ops.matmul(scale_9, transpose_62, False, False) + del scale_9, transpose_62 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_143 = paddle._C_ops.reshape(data_19, full_int_array_7) + del data_19 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_9 = paddle._C_ops.index_select(data_20, reshape_143, 0) + del data_20, reshape_143 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_144 = paddle._C_ops.reshape(index_select_9, full_int_array_8) + del index_select_9 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_63 = paddle._C_ops.transpose(reshape_144, [2, 0, 1]) + del reshape_144 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_63, full_int_array_0) + del transpose_63 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_70 = paddle._C_ops.add(matmul_57, unsqueeze_27) + del matmul_57, unsqueeze_27 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_4 = paddle._C_ops.floor_divide(slice_69, full_35) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_64 = [floor_divide_4, full_21, full_29, full_4, full_4] + del floor_divide_4 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_62 = paddle._C_ops.stack(combine_64, 0) + del combine_64 + + # pd_op.reshape: (-1x4x24x144x144xf32) <- (-1x24x144x144xf32, 5xi64) + reshape_145 = paddle._C_ops.reshape(add_70, stack_62) + del add_70, stack_62 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(where_9, full_int_array_1) + del where_9 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_28, full_int_array_0) + del unsqueeze_28 + + # pd_op.add: (-1x4x24x144x144xf32) <- (-1x4x24x144x144xf32, 1x4x1x144x144xf32) + add_71 = paddle._C_ops.add(reshape_145, unsqueeze_29) + del reshape_145, unsqueeze_29 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_65 = [slice_69, full_29, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_63 = paddle._C_ops.stack(combine_65, 0) + del combine_65 + + # pd_op.reshape: (-1x24x144x144xf32) <- (-1x4x24x144x144xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_71, stack_63) + del add_71, stack_63 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_9 = paddle._C_ops.softmax(reshape_146, -1) + del reshape_146 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_58 = paddle._C_ops.matmul(softmax_9, slice_72, False, False) + del slice_72, softmax_9 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_64 = paddle._C_ops.transpose(matmul_58, [0, 2, 1, 3]) + del matmul_58 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_66 = [slice_69, full_4, full_18] + del slice_69 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_64 = paddle._C_ops.stack(combine_66, 0) + del combine_66 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_147 = paddle._C_ops.reshape(transpose_64, stack_64) + del stack_64, transpose_64 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(reshape_147, parameter_182, False, False) + del parameter_182, reshape_147 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_72 = paddle._C_ops.add(matmul_59, parameter_181) + del matmul_59, parameter_181 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_148 = paddle._C_ops.reshape(add_72, full_int_array_38) + del add_72 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_149 = paddle._C_ops.reshape(reshape_148, full_int_array_40) + del reshape_148 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_65 = paddle._C_ops.transpose(reshape_149, [0, 1, 3, 2, 4, 5]) + del reshape_149 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_65, full_int_array_41) + del transpose_65 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_150, full_int_array_29, [1, 2]) + del reshape_150 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_67 = [slice_66, full_31, full_18] + del slice_66 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_65 = paddle._C_ops.stack(combine_67, 0) + del combine_67 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(roll_9, stack_65) + del roll_9, stack_65 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_73 = paddle._C_ops.add(add_68, reshape_151) + del add_68, reshape_151 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del layer_norm_66, parameter_178 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_74 = paddle._C_ops.add(matmul_60, parameter_177) + del matmul_60, parameter_177 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_74, False) + del add_74 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_61 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del gelu_9, parameter_176 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_61, parameter_175) + del matmul_61, parameter_175 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_76 = paddle._C_ops.add(add_73, add_75) + del add_73, add_75 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_43 = paddle._C_ops.shape64(add_76) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_73 = paddle._C_ops.slice( + shape64_43, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_43 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_68 = [slice_73, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_66 = paddle._C_ops.stack(combine_68, 0) + del combine_68 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_152 = paddle._C_ops.reshape(layer_norm_69, stack_66) + del layer_norm_69, stack_66 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_44 = paddle._C_ops.shape64(reshape_152) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_74 = paddle._C_ops.slice( + shape64_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_44 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_69 = [slice_74, full_30, full_3, full_30, full_3, full_18] + del slice_74 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_67 = paddle._C_ops.stack(combine_69, 0) + del combine_69 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_153 = paddle._C_ops.reshape(reshape_152, stack_67) + del reshape_152, stack_67 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_66 = paddle._C_ops.transpose(reshape_153, [0, 1, 3, 2, 4, 5]) + del reshape_153 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_154 = paddle._C_ops.reshape(transpose_66, full_int_array_38) + del transpose_66 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_155 = paddle._C_ops.reshape(reshape_154, full_int_array_39) + del reshape_154 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_45 = paddle._C_ops.shape64(reshape_155) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_75 = paddle._C_ops.slice( + shape64_45, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_45 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_62 = paddle._C_ops.matmul(reshape_155, parameter_172, False, False) + del parameter_172, reshape_155 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_77 = paddle._C_ops.add(matmul_62, parameter_171) + del matmul_62, parameter_171 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_70 = [slice_75, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_68 = paddle._C_ops.stack(combine_70, 0) + del combine_70 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_156 = paddle._C_ops.reshape(add_77, stack_68) + del add_77, stack_68 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_67 = paddle._C_ops.transpose(reshape_156, [2, 0, 3, 1, 4]) + del reshape_156 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_76 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_77 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_78 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_67 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_76, full_8, float("0"), True) + del slice_76 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_68 = paddle._C_ops.transpose(slice_77, [0, 1, 3, 2]) + del slice_77 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_63 = paddle._C_ops.matmul(scale_10, transpose_68, False, False) + del scale_10, transpose_68 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_157 = paddle._C_ops.reshape(data_21, full_int_array_7) + del data_21 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_10 = paddle._C_ops.index_select(data_22, reshape_157, 0) + del data_22, reshape_157 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_158 = paddle._C_ops.reshape(index_select_10, full_int_array_8) + del index_select_10 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_69 = paddle._C_ops.transpose(reshape_158, [2, 0, 1]) + del reshape_158 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_69, full_int_array_0) + del transpose_69 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_78 = paddle._C_ops.add(matmul_63, unsqueeze_30) + del matmul_63, unsqueeze_30 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_10 = paddle._C_ops.softmax(add_78, -1) + del add_78 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_64 = paddle._C_ops.matmul(softmax_10, slice_78, False, False) + del slice_78, softmax_10 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_70 = paddle._C_ops.transpose(matmul_64, [0, 2, 1, 3]) + del matmul_64 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_71 = [slice_75, full_4, full_18] + del slice_75 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_69 = paddle._C_ops.stack(combine_71, 0) + del combine_71 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_159 = paddle._C_ops.reshape(transpose_70, stack_69) + del stack_69, transpose_70 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(reshape_159, parameter_170, False, False) + del parameter_170, reshape_159 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_79 = paddle._C_ops.add(matmul_65, parameter_169) + del matmul_65, parameter_169 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_160 = paddle._C_ops.reshape(add_79, full_int_array_38) + del add_79 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_161 = paddle._C_ops.reshape(reshape_160, full_int_array_40) + del reshape_160 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_71 = paddle._C_ops.transpose(reshape_161, [0, 1, 3, 2, 4, 5]) + del reshape_161 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_162 = paddle._C_ops.reshape(transpose_71, full_int_array_41) + del transpose_71 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_72 = [slice_73, full_31, full_18] + del slice_73 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_70 = paddle._C_ops.stack(combine_72, 0) + del combine_72 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_163 = paddle._C_ops.reshape(reshape_162, stack_70) + del reshape_162, stack_70 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_80 = paddle._C_ops.add(add_76, reshape_163) + del add_76, reshape_163 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_80, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del layer_norm_72, parameter_166 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_81 = paddle._C_ops.add(matmul_66, parameter_165) + del matmul_66, parameter_165 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_81, False) + del add_81 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_67 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del gelu_10, parameter_164 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_82 = paddle._C_ops.add(matmul_67, parameter_163) + del matmul_67, parameter_163 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_83 = paddle._C_ops.add(add_80, add_82) + del add_80, add_82 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_46 = paddle._C_ops.shape64(add_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_79 = paddle._C_ops.slice( + shape64_46, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_46 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_83, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_73 = [slice_79, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_71 = paddle._C_ops.stack(combine_73, 0) + del combine_73 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(layer_norm_75, stack_71) + del layer_norm_75, stack_71 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_47 = paddle._C_ops.shape64(reshape_164) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_80 = paddle._C_ops.slice( + shape64_47, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_47 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_164, full_int_array_11, [1, 2]) + del reshape_164 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_48 = paddle._C_ops.shape64(roll_10) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_81 = paddle._C_ops.slice( + shape64_48, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_48 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_74 = [slice_81, full_30, full_3, full_30, full_3, full_18] + del slice_81 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_72 = paddle._C_ops.stack(combine_74, 0) + del combine_74 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_165 = paddle._C_ops.reshape(roll_10, stack_72) + del roll_10, stack_72 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_72 = paddle._C_ops.transpose(reshape_165, [0, 1, 3, 2, 4, 5]) + del reshape_165 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(transpose_72, full_int_array_38) + del transpose_72 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_167 = paddle._C_ops.reshape(reshape_166, full_int_array_39) + del reshape_166 + + # pd_op.full: (1x24x24x1xf32) <- () + full_38 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_38, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_38 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_168 = paddle._C_ops.reshape(set_value__5, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_73 = paddle._C_ops.transpose(reshape_168, [0, 1, 3, 2, 4, 5]) + del reshape_168 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_169 = paddle._C_ops.reshape(transpose_73, full_int_array_27) + del transpose_73 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_170 = paddle._C_ops.reshape(reshape_169, full_int_array_28) + del reshape_169 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(reshape_170, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(reshape_170, full_int_array_5) + del reshape_170 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_31, unsqueeze_32) + del unsqueeze_31, unsqueeze_32 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_33, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_11 = paddle._C_ops.where(equal_5, full_34, where_10) + del equal_5, where_10 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_49 = paddle._C_ops.shape64(reshape_167) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_82 = paddle._C_ops.slice( + shape64_49, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_49 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_68 = paddle._C_ops.matmul(reshape_167, parameter_160, False, False) + del parameter_160, reshape_167 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_84 = paddle._C_ops.add(matmul_68, parameter_159) + del matmul_68, parameter_159 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_75 = [slice_82, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_73 = paddle._C_ops.stack(combine_75, 0) + del combine_75 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_171 = paddle._C_ops.reshape(add_84, stack_73) + del add_84, stack_73 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_171, [2, 0, 3, 1, 4]) + del reshape_171 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_83 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_84 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_85 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_74 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_83, full_8, float("0"), True) + del slice_83 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_84, [0, 1, 3, 2]) + del slice_84 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_69 = paddle._C_ops.matmul(scale_11, transpose_75, False, False) + del scale_11, transpose_75 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_172 = paddle._C_ops.reshape(data_23, full_int_array_7) + del data_23 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_11 = paddle._C_ops.index_select(data_24, reshape_172, 0) + del data_24, reshape_172 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_173 = paddle._C_ops.reshape(index_select_11, full_int_array_8) + del index_select_11 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_76 = paddle._C_ops.transpose(reshape_173, [2, 0, 1]) + del reshape_173 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_0) + del transpose_76 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_85 = paddle._C_ops.add(matmul_69, unsqueeze_33) + del matmul_69, unsqueeze_33 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_5 = paddle._C_ops.floor_divide(slice_82, full_35) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_76 = [floor_divide_5, full_21, full_29, full_4, full_4] + del floor_divide_5 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_74 = paddle._C_ops.stack(combine_76, 0) + del combine_76 + + # pd_op.reshape: (-1x4x24x144x144xf32) <- (-1x24x144x144xf32, 5xi64) + reshape_174 = paddle._C_ops.reshape(add_85, stack_74) + del add_85, stack_74 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(where_11, full_int_array_1) + del where_11 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_34, full_int_array_0) + del unsqueeze_34 + + # pd_op.add: (-1x4x24x144x144xf32) <- (-1x4x24x144x144xf32, 1x4x1x144x144xf32) + add_86 = paddle._C_ops.add(reshape_174, unsqueeze_35) + del reshape_174, unsqueeze_35 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_77 = [slice_82, full_29, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_75 = paddle._C_ops.stack(combine_77, 0) + del combine_77 + + # pd_op.reshape: (-1x24x144x144xf32) <- (-1x4x24x144x144xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(add_86, stack_75) + del add_86, stack_75 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_11 = paddle._C_ops.softmax(reshape_175, -1) + del reshape_175 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_70 = paddle._C_ops.matmul(softmax_11, slice_85, False, False) + del slice_85, softmax_11 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_70, [0, 2, 1, 3]) + del matmul_70 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_78 = [slice_82, full_4, full_18] + del slice_82 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_76 = paddle._C_ops.stack(combine_78, 0) + del combine_78 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_176 = paddle._C_ops.reshape(transpose_77, stack_76) + del stack_76, transpose_77 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_71 = paddle._C_ops.matmul(reshape_176, parameter_158, False, False) + del parameter_158, reshape_176 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_87 = paddle._C_ops.add(matmul_71, parameter_157) + del matmul_71, parameter_157 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_177 = paddle._C_ops.reshape(add_87, full_int_array_38) + del add_87 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_178 = paddle._C_ops.reshape(reshape_177, full_int_array_40) + del reshape_177 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_78 = paddle._C_ops.transpose(reshape_178, [0, 1, 3, 2, 4, 5]) + del reshape_178 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_179 = paddle._C_ops.reshape(transpose_78, full_int_array_41) + del transpose_78 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_179, full_int_array_29, [1, 2]) + del reshape_179 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_79 = [slice_79, full_31, full_18] + del slice_79 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_77 = paddle._C_ops.stack(combine_79, 0) + del combine_79 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_180 = paddle._C_ops.reshape(roll_11, stack_77) + del roll_11, stack_77 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_88 = paddle._C_ops.add(add_83, reshape_180) + del add_83, reshape_180 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_88, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_72 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del layer_norm_78, parameter_154 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_89 = paddle._C_ops.add(matmul_72, parameter_153) + del matmul_72, parameter_153 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_89, False) + del add_89 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_73 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del gelu_11, parameter_152 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_90 = paddle._C_ops.add(matmul_73, parameter_151) + del matmul_73, parameter_151 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_91 = paddle._C_ops.add(add_88, add_90) + del add_88, add_90 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_50 = paddle._C_ops.shape64(add_91) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_86 = paddle._C_ops.slice( + shape64_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_50 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_91, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_80 = [slice_86, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_78 = paddle._C_ops.stack(combine_80, 0) + del combine_80 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(layer_norm_81, stack_78) + del layer_norm_81, stack_78 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_51 = paddle._C_ops.shape64(reshape_181) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_87 = paddle._C_ops.slice( + shape64_51, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_81 = [slice_87, full_30, full_3, full_30, full_3, full_18] + del slice_87 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_79 = paddle._C_ops.stack(combine_81, 0) + del combine_81 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, stack_79) + del reshape_181, stack_79 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_79 = paddle._C_ops.transpose(reshape_182, [0, 1, 3, 2, 4, 5]) + del reshape_182 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(transpose_79, full_int_array_38) + del transpose_79 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_184 = paddle._C_ops.reshape(reshape_183, full_int_array_39) + del reshape_183 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_52 = paddle._C_ops.shape64(reshape_184) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_88 = paddle._C_ops.slice( + shape64_52, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_52 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_74 = paddle._C_ops.matmul(reshape_184, parameter_148, False, False) + del parameter_148, reshape_184 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_92 = paddle._C_ops.add(matmul_74, parameter_147) + del matmul_74, parameter_147 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_82 = [slice_88, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_80 = paddle._C_ops.stack(combine_82, 0) + del combine_82 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_185 = paddle._C_ops.reshape(add_92, stack_80) + del add_92, stack_80 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_185, [2, 0, 3, 1, 4]) + del reshape_185 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_89 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_90 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_91 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_80 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_89, full_8, float("0"), True) + del slice_89 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_90, [0, 1, 3, 2]) + del slice_90 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_75 = paddle._C_ops.matmul(scale_12, transpose_81, False, False) + del scale_12, transpose_81 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_25, full_int_array_7) + del data_25 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_12 = paddle._C_ops.index_select(data_26, reshape_186, 0) + del data_26, reshape_186 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_187 = paddle._C_ops.reshape(index_select_12, full_int_array_8) + del index_select_12 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_82 = paddle._C_ops.transpose(reshape_187, [2, 0, 1]) + del reshape_187 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_0) + del transpose_82 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_93 = paddle._C_ops.add(matmul_75, unsqueeze_36) + del matmul_75, unsqueeze_36 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_12 = paddle._C_ops.softmax(add_93, -1) + del add_93 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_76 = paddle._C_ops.matmul(softmax_12, slice_91, False, False) + del slice_91, softmax_12 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_76, [0, 2, 1, 3]) + del matmul_76 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_83 = [slice_88, full_4, full_18] + del slice_88 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_81 = paddle._C_ops.stack(combine_83, 0) + del combine_83 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_83, stack_81) + del stack_81, transpose_83 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_77 = paddle._C_ops.matmul(reshape_188, parameter_146, False, False) + del parameter_146, reshape_188 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_77, parameter_145) + del matmul_77, parameter_145 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_94, full_int_array_38) + del add_94 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_190 = paddle._C_ops.reshape(reshape_189, full_int_array_40) + del reshape_189 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_84 = paddle._C_ops.transpose(reshape_190, [0, 1, 3, 2, 4, 5]) + del reshape_190 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_191 = paddle._C_ops.reshape(transpose_84, full_int_array_41) + del transpose_84 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_84 = [slice_86, full_31, full_18] + del slice_86 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_82 = paddle._C_ops.stack(combine_84, 0) + del combine_84 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_192 = paddle._C_ops.reshape(reshape_191, stack_82) + del reshape_191, stack_82 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_95 = paddle._C_ops.add(add_91, reshape_192) + del add_91, reshape_192 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del layer_norm_84, parameter_142 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_96 = paddle._C_ops.add(matmul_78, parameter_141) + del matmul_78, parameter_141 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_12 = paddle._C_ops.gelu(add_96, False) + del add_96 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_79 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del gelu_12, parameter_140 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_97 = paddle._C_ops.add(matmul_79, parameter_139) + del matmul_79, parameter_139 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_98 = paddle._C_ops.add(add_95, add_97) + del add_95, add_97 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_53 = paddle._C_ops.shape64(add_98) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_92 = paddle._C_ops.slice( + shape64_53, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_53 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_85 = [slice_92, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_83 = paddle._C_ops.stack(combine_85, 0) + del combine_85 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_193 = paddle._C_ops.reshape(layer_norm_87, stack_83) + del layer_norm_87, stack_83 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_54 = paddle._C_ops.shape64(reshape_193) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_93 = paddle._C_ops.slice( + shape64_54, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_54 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_193, full_int_array_11, [1, 2]) + del reshape_193 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_55 = paddle._C_ops.shape64(roll_12) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_94 = paddle._C_ops.slice( + shape64_55, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_55 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_86 = [slice_94, full_30, full_3, full_30, full_3, full_18] + del slice_94 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_84 = paddle._C_ops.stack(combine_86, 0) + del combine_86 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_194 = paddle._C_ops.reshape(roll_12, stack_84) + del roll_12, stack_84 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_85 = paddle._C_ops.transpose(reshape_194, [0, 1, 3, 2, 4, 5]) + del reshape_194 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_85, full_int_array_38) + del transpose_85 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_39) + del reshape_195 + + # pd_op.full: (1x24x24x1xf32) <- () + full_39 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_39, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_39 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_197 = paddle._C_ops.reshape(set_value__6, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_86 = paddle._C_ops.transpose(reshape_197, [0, 1, 3, 2, 4, 5]) + del reshape_197 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_198 = paddle._C_ops.reshape(transpose_86, full_int_array_27) + del transpose_86 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_199 = paddle._C_ops.reshape(reshape_198, full_int_array_28) + del reshape_198 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_199, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_199, full_int_array_5) + del reshape_199 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_33, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_13 = paddle._C_ops.where(equal_6, full_34, where_12) + del equal_6, where_12 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_56 = paddle._C_ops.shape64(reshape_196) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_95 = paddle._C_ops.slice( + shape64_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_56 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_80 = paddle._C_ops.matmul(reshape_196, parameter_136, False, False) + del parameter_136, reshape_196 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_99 = paddle._C_ops.add(matmul_80, parameter_135) + del matmul_80, parameter_135 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_87 = [slice_95, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_85 = paddle._C_ops.stack(combine_87, 0) + del combine_87 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_200 = paddle._C_ops.reshape(add_99, stack_85) + del add_99, stack_85 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_87 = paddle._C_ops.transpose(reshape_200, [2, 0, 3, 1, 4]) + del reshape_200 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_96 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_97 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_98 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_87 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_96, full_8, float("0"), True) + del slice_96 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_88 = paddle._C_ops.transpose(slice_97, [0, 1, 3, 2]) + del slice_97 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_81 = paddle._C_ops.matmul(scale_13, transpose_88, False, False) + del scale_13, transpose_88 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_201 = paddle._C_ops.reshape(data_27, full_int_array_7) + del data_27 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_13 = paddle._C_ops.index_select(data_28, reshape_201, 0) + del data_28, reshape_201 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_202 = paddle._C_ops.reshape(index_select_13, full_int_array_8) + del index_select_13 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_89 = paddle._C_ops.transpose(reshape_202, [2, 0, 1]) + del reshape_202 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(transpose_89, full_int_array_0) + del transpose_89 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_100 = paddle._C_ops.add(matmul_81, unsqueeze_39) + del matmul_81, unsqueeze_39 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_6 = paddle._C_ops.floor_divide(slice_95, full_35) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_88 = [floor_divide_6, full_21, full_29, full_4, full_4] + del floor_divide_6 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_86 = paddle._C_ops.stack(combine_88, 0) + del combine_88 + + # pd_op.reshape: (-1x4x24x144x144xf32) <- (-1x24x144x144xf32, 5xi64) + reshape_203 = paddle._C_ops.reshape(add_100, stack_86) + del add_100, stack_86 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(where_13, full_int_array_1) + del where_13 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(unsqueeze_40, full_int_array_0) + del unsqueeze_40 + + # pd_op.add: (-1x4x24x144x144xf32) <- (-1x4x24x144x144xf32, 1x4x1x144x144xf32) + add_101 = paddle._C_ops.add(reshape_203, unsqueeze_41) + del reshape_203, unsqueeze_41 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_89 = [slice_95, full_29, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_87 = paddle._C_ops.stack(combine_89, 0) + del combine_89 + + # pd_op.reshape: (-1x24x144x144xf32) <- (-1x4x24x144x144xf32, 4xi64) + reshape_204 = paddle._C_ops.reshape(add_101, stack_87) + del add_101, stack_87 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_13 = paddle._C_ops.softmax(reshape_204, -1) + del reshape_204 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_82 = paddle._C_ops.matmul(softmax_13, slice_98, False, False) + del slice_98, softmax_13 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_90 = paddle._C_ops.transpose(matmul_82, [0, 2, 1, 3]) + del matmul_82 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_90 = [slice_95, full_4, full_18] + del slice_95 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_88 = paddle._C_ops.stack(combine_90, 0) + del combine_90 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_205 = paddle._C_ops.reshape(transpose_90, stack_88) + del stack_88, transpose_90 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_83 = paddle._C_ops.matmul(reshape_205, parameter_134, False, False) + del parameter_134, reshape_205 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_102 = paddle._C_ops.add(matmul_83, parameter_133) + del matmul_83, parameter_133 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_206 = paddle._C_ops.reshape(add_102, full_int_array_38) + del add_102 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_207 = paddle._C_ops.reshape(reshape_206, full_int_array_40) + del reshape_206 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_91 = paddle._C_ops.transpose(reshape_207, [0, 1, 3, 2, 4, 5]) + del reshape_207 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(transpose_91, full_int_array_41) + del transpose_91 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_208, full_int_array_29, [1, 2]) + del reshape_208 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_91 = [slice_92, full_31, full_18] + del slice_92 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_89 = paddle._C_ops.stack(combine_91, 0) + del combine_91 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_209 = paddle._C_ops.reshape(roll_13, stack_89) + del roll_13, stack_89 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_103 = paddle._C_ops.add(add_98, reshape_209) + del add_98, reshape_209 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_103, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_84 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del layer_norm_90, parameter_130 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_104 = paddle._C_ops.add(matmul_84, parameter_129) + del matmul_84, parameter_129 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_13 = paddle._C_ops.gelu(add_104, False) + del add_104 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_85 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del gelu_13, parameter_128 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_105 = paddle._C_ops.add(matmul_85, parameter_127) + del matmul_85, parameter_127 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_106 = paddle._C_ops.add(add_103, add_105) + del add_103, add_105 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_57 = paddle._C_ops.shape64(add_106) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_99 = paddle._C_ops.slice( + shape64_57, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_57 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_106, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_92 = [slice_99, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_90 = paddle._C_ops.stack(combine_92, 0) + del combine_92 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_210 = paddle._C_ops.reshape(layer_norm_93, stack_90) + del layer_norm_93, stack_90 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_58 = paddle._C_ops.shape64(reshape_210) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_100 = paddle._C_ops.slice( + shape64_58, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_58 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_93 = [slice_100, full_30, full_3, full_30, full_3, full_18] + del slice_100 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_91 = paddle._C_ops.stack(combine_93, 0) + del combine_93 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_210, stack_91) + del reshape_210, stack_91 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_92 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_212 = paddle._C_ops.reshape(transpose_92, full_int_array_38) + del transpose_92 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(reshape_212, full_int_array_39) + del reshape_212 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_59 = paddle._C_ops.shape64(reshape_213) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_101 = paddle._C_ops.slice( + shape64_59, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_59 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_86 = paddle._C_ops.matmul(reshape_213, parameter_124, False, False) + del parameter_124, reshape_213 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_107 = paddle._C_ops.add(matmul_86, parameter_123) + del matmul_86, parameter_123 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_94 = [slice_101, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_92 = paddle._C_ops.stack(combine_94, 0) + del combine_94 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_214 = paddle._C_ops.reshape(add_107, stack_92) + del add_107, stack_92 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_93 = paddle._C_ops.transpose(reshape_214, [2, 0, 3, 1, 4]) + del reshape_214 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_102 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_103 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_104 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_93 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_102, full_8, float("0"), True) + del slice_102 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_94 = paddle._C_ops.transpose(slice_103, [0, 1, 3, 2]) + del slice_103 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_87 = paddle._C_ops.matmul(scale_14, transpose_94, False, False) + del scale_14, transpose_94 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_215 = paddle._C_ops.reshape(data_29, full_int_array_7) + del data_29 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_14 = paddle._C_ops.index_select(data_30, reshape_215, 0) + del data_30, reshape_215 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_216 = paddle._C_ops.reshape(index_select_14, full_int_array_8) + del index_select_14 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_95 = paddle._C_ops.transpose(reshape_216, [2, 0, 1]) + del reshape_216 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(transpose_95, full_int_array_0) + del transpose_95 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_108 = paddle._C_ops.add(matmul_87, unsqueeze_42) + del matmul_87, unsqueeze_42 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_14 = paddle._C_ops.softmax(add_108, -1) + del add_108 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_88 = paddle._C_ops.matmul(softmax_14, slice_104, False, False) + del slice_104, softmax_14 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_96 = paddle._C_ops.transpose(matmul_88, [0, 2, 1, 3]) + del matmul_88 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_95 = [slice_101, full_4, full_18] + del slice_101 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_93 = paddle._C_ops.stack(combine_95, 0) + del combine_95 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_217 = paddle._C_ops.reshape(transpose_96, stack_93) + del stack_93, transpose_96 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(reshape_217, parameter_122, False, False) + del parameter_122, reshape_217 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_109 = paddle._C_ops.add(matmul_89, parameter_121) + del matmul_89, parameter_121 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_218 = paddle._C_ops.reshape(add_109, full_int_array_38) + del add_109 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_219 = paddle._C_ops.reshape(reshape_218, full_int_array_40) + del reshape_218 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_97 = paddle._C_ops.transpose(reshape_219, [0, 1, 3, 2, 4, 5]) + del reshape_219 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_220 = paddle._C_ops.reshape(transpose_97, full_int_array_41) + del transpose_97 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_96 = [slice_99, full_31, full_18] + del slice_99 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_94 = paddle._C_ops.stack(combine_96, 0) + del combine_96 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_221 = paddle._C_ops.reshape(reshape_220, stack_94) + del reshape_220, stack_94 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_110 = paddle._C_ops.add(add_106, reshape_221) + del add_106, reshape_221 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_110, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del layer_norm_96, parameter_118 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_111 = paddle._C_ops.add(matmul_90, parameter_117) + del matmul_90, parameter_117 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_14 = paddle._C_ops.gelu(add_111, False) + del add_111 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_91 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del gelu_14, parameter_116 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_112 = paddle._C_ops.add(matmul_91, parameter_115) + del matmul_91, parameter_115 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_113 = paddle._C_ops.add(add_110, add_112) + del add_110, add_112 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_60 = paddle._C_ops.shape64(add_113) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_105 = paddle._C_ops.slice( + shape64_60, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_60 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_113, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_97 = [slice_105, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_95 = paddle._C_ops.stack(combine_97, 0) + del combine_97 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_222 = paddle._C_ops.reshape(layer_norm_99, stack_95) + del layer_norm_99, stack_95 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_61 = paddle._C_ops.shape64(reshape_222) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_106 = paddle._C_ops.slice( + shape64_61, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_61 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_222, full_int_array_11, [1, 2]) + del reshape_222 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_62 = paddle._C_ops.shape64(roll_14) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_107 = paddle._C_ops.slice( + shape64_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_62 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_98 = [slice_107, full_30, full_3, full_30, full_3, full_18] + del slice_107 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_96 = paddle._C_ops.stack(combine_98, 0) + del combine_98 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_223 = paddle._C_ops.reshape(roll_14, stack_96) + del roll_14, stack_96 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_98 = paddle._C_ops.transpose(reshape_223, [0, 1, 3, 2, 4, 5]) + del reshape_223 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_224 = paddle._C_ops.reshape(transpose_98, full_int_array_38) + del transpose_98 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_225 = paddle._C_ops.reshape(reshape_224, full_int_array_39) + del reshape_224 + + # pd_op.full: (1x24x24x1xf32) <- () + full_40 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_40, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_40 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_226 = paddle._C_ops.reshape(set_value__7, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_99 = paddle._C_ops.transpose(reshape_226, [0, 1, 3, 2, 4, 5]) + del reshape_226 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_227 = paddle._C_ops.reshape(transpose_99, full_int_array_27) + del transpose_99 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_228 = paddle._C_ops.reshape(reshape_227, full_int_array_28) + del reshape_227 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_228, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_228, full_int_array_5) + del reshape_228 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_33, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_15 = paddle._C_ops.where(equal_7, full_34, where_14) + del equal_7, where_14 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_63 = paddle._C_ops.shape64(reshape_225) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_108 = paddle._C_ops.slice( + shape64_63, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_63 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_92 = paddle._C_ops.matmul(reshape_225, parameter_112, False, False) + del parameter_112, reshape_225 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_114 = paddle._C_ops.add(matmul_92, parameter_111) + del matmul_92, parameter_111 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_99 = [slice_108, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_97 = paddle._C_ops.stack(combine_99, 0) + del combine_99 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_229 = paddle._C_ops.reshape(add_114, stack_97) + del add_114, stack_97 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_100 = paddle._C_ops.transpose(reshape_229, [2, 0, 3, 1, 4]) + del reshape_229 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_109 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_110 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_111 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_100 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_109, full_8, float("0"), True) + del slice_109 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_101 = paddle._C_ops.transpose(slice_110, [0, 1, 3, 2]) + del slice_110 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_93 = paddle._C_ops.matmul(scale_15, transpose_101, False, False) + del scale_15, transpose_101 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_230 = paddle._C_ops.reshape(data_31, full_int_array_7) + del data_31 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_15 = paddle._C_ops.index_select(data_32, reshape_230, 0) + del data_32, reshape_230 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_231 = paddle._C_ops.reshape(index_select_15, full_int_array_8) + del index_select_15 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_102 = paddle._C_ops.transpose(reshape_231, [2, 0, 1]) + del reshape_231 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(transpose_102, full_int_array_0) + del transpose_102 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_115 = paddle._C_ops.add(matmul_93, unsqueeze_45) + del matmul_93, unsqueeze_45 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_7 = paddle._C_ops.floor_divide(slice_108, full_35) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_100 = [floor_divide_7, full_21, full_29, full_4, full_4] + del floor_divide_7 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_98 = paddle._C_ops.stack(combine_100, 0) + del combine_100 + + # pd_op.reshape: (-1x4x24x144x144xf32) <- (-1x24x144x144xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_115, stack_98) + del add_115, stack_98 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(where_15, full_int_array_1) + del where_15 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(unsqueeze_46, full_int_array_0) + del unsqueeze_46 + + # pd_op.add: (-1x4x24x144x144xf32) <- (-1x4x24x144x144xf32, 1x4x1x144x144xf32) + add_116 = paddle._C_ops.add(reshape_232, unsqueeze_47) + del reshape_232, unsqueeze_47 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_101 = [slice_108, full_29, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_99 = paddle._C_ops.stack(combine_101, 0) + del combine_101 + + # pd_op.reshape: (-1x24x144x144xf32) <- (-1x4x24x144x144xf32, 4xi64) + reshape_233 = paddle._C_ops.reshape(add_116, stack_99) + del add_116, stack_99 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_15 = paddle._C_ops.softmax(reshape_233, -1) + del reshape_233 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_94 = paddle._C_ops.matmul(softmax_15, slice_111, False, False) + del slice_111, softmax_15 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_103 = paddle._C_ops.transpose(matmul_94, [0, 2, 1, 3]) + del matmul_94 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_102 = [slice_108, full_4, full_18] + del slice_108 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_100 = paddle._C_ops.stack(combine_102, 0) + del combine_102 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_234 = paddle._C_ops.reshape(transpose_103, stack_100) + del stack_100, transpose_103 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_95 = paddle._C_ops.matmul(reshape_234, parameter_110, False, False) + del parameter_110, reshape_234 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_117 = paddle._C_ops.add(matmul_95, parameter_109) + del matmul_95, parameter_109 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_235 = paddle._C_ops.reshape(add_117, full_int_array_38) + del add_117 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_236 = paddle._C_ops.reshape(reshape_235, full_int_array_40) + del reshape_235 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_104 = paddle._C_ops.transpose(reshape_236, [0, 1, 3, 2, 4, 5]) + del reshape_236 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_237 = paddle._C_ops.reshape(transpose_104, full_int_array_41) + del transpose_104 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_237, full_int_array_29, [1, 2]) + del reshape_237 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_103 = [slice_105, full_31, full_18] + del slice_105 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_101 = paddle._C_ops.stack(combine_103, 0) + del combine_103 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_238 = paddle._C_ops.reshape(roll_15, stack_101) + del roll_15, stack_101 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_118 = paddle._C_ops.add(add_113, reshape_238) + del add_113, reshape_238 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_96 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del layer_norm_102, parameter_106 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_119 = paddle._C_ops.add(matmul_96, parameter_105) + del matmul_96, parameter_105 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_15 = paddle._C_ops.gelu(add_119, False) + del add_119 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_97 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del gelu_15, parameter_104 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_120 = paddle._C_ops.add(matmul_97, parameter_103) + del matmul_97, parameter_103 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_121 = paddle._C_ops.add(add_118, add_120) + del add_118, add_120 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_64 = paddle._C_ops.shape64(add_121) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_112 = paddle._C_ops.slice( + shape64_64, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_64 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_121, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_104 = [slice_112, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_102 = paddle._C_ops.stack(combine_104, 0) + del combine_104 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_239 = paddle._C_ops.reshape(layer_norm_105, stack_102) + del layer_norm_105, stack_102 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_65 = paddle._C_ops.shape64(reshape_239) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_113 = paddle._C_ops.slice( + shape64_65, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_65 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_105 = [slice_113, full_30, full_3, full_30, full_3, full_18] + del slice_113 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_103 = paddle._C_ops.stack(combine_105, 0) + del combine_105 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_239, stack_103) + del reshape_239, stack_103 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_105 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_241 = paddle._C_ops.reshape(transpose_105, full_int_array_38) + del transpose_105 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_242 = paddle._C_ops.reshape(reshape_241, full_int_array_39) + del reshape_241 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_66 = paddle._C_ops.shape64(reshape_242) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_114 = paddle._C_ops.slice( + shape64_66, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_66 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_98 = paddle._C_ops.matmul(reshape_242, parameter_100, False, False) + del parameter_100, reshape_242 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_122 = paddle._C_ops.add(matmul_98, parameter_99) + del matmul_98, parameter_99 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_106 = [slice_114, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_104 = paddle._C_ops.stack(combine_106, 0) + del combine_106 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_243 = paddle._C_ops.reshape(add_122, stack_104) + del add_122, stack_104 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_106 = paddle._C_ops.transpose(reshape_243, [2, 0, 3, 1, 4]) + del reshape_243 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_115 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_116 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_117 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_106 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_115, full_8, float("0"), True) + del slice_115 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_107 = paddle._C_ops.transpose(slice_116, [0, 1, 3, 2]) + del slice_116 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_99 = paddle._C_ops.matmul(scale_16, transpose_107, False, False) + del scale_16, transpose_107 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_244 = paddle._C_ops.reshape(data_33, full_int_array_7) + del data_33 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_16 = paddle._C_ops.index_select(data_34, reshape_244, 0) + del data_34, reshape_244 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_245 = paddle._C_ops.reshape(index_select_16, full_int_array_8) + del index_select_16 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_108 = paddle._C_ops.transpose(reshape_245, [2, 0, 1]) + del reshape_245 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(transpose_108, full_int_array_0) + del transpose_108 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_123 = paddle._C_ops.add(matmul_99, unsqueeze_48) + del matmul_99, unsqueeze_48 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_16 = paddle._C_ops.softmax(add_123, -1) + del add_123 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_100 = paddle._C_ops.matmul(softmax_16, slice_117, False, False) + del slice_117, softmax_16 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_109 = paddle._C_ops.transpose(matmul_100, [0, 2, 1, 3]) + del matmul_100 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_107 = [slice_114, full_4, full_18] + del slice_114 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_105 = paddle._C_ops.stack(combine_107, 0) + del combine_107 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(transpose_109, stack_105) + del stack_105, transpose_109 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_101 = paddle._C_ops.matmul(reshape_246, parameter_98, False, False) + del parameter_98, reshape_246 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_124 = paddle._C_ops.add(matmul_101, parameter_97) + del matmul_101, parameter_97 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_124, full_int_array_38) + del add_124 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_247, full_int_array_40) + del reshape_247 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_110 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_249 = paddle._C_ops.reshape(transpose_110, full_int_array_41) + del transpose_110 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_108 = [slice_112, full_31, full_18] + del slice_112 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_106 = paddle._C_ops.stack(combine_108, 0) + del combine_108 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_250 = paddle._C_ops.reshape(reshape_249, stack_106) + del reshape_249, stack_106 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_125 = paddle._C_ops.add(add_121, reshape_250) + del add_121, reshape_250 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_125, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_102 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del layer_norm_108, parameter_94 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_126 = paddle._C_ops.add(matmul_102, parameter_93) + del matmul_102, parameter_93 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_16 = paddle._C_ops.gelu(add_126, False) + del add_126 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_103 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del gelu_16, parameter_92 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_127 = paddle._C_ops.add(matmul_103, parameter_91) + del matmul_103, parameter_91 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_128 = paddle._C_ops.add(add_125, add_127) + del add_125, add_127 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_67 = paddle._C_ops.shape64(add_128) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_118 = paddle._C_ops.slice( + shape64_67, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_67 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_128, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_109 = [slice_118, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_107 = paddle._C_ops.stack(combine_109, 0) + del combine_109 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_251 = paddle._C_ops.reshape(layer_norm_111, stack_107) + del layer_norm_111, stack_107 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_68 = paddle._C_ops.shape64(reshape_251) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_119 = paddle._C_ops.slice( + shape64_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_68 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_251, full_int_array_11, [1, 2]) + del reshape_251 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_69 = paddle._C_ops.shape64(roll_16) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_120 = paddle._C_ops.slice( + shape64_69, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_69 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_110 = [slice_120, full_30, full_3, full_30, full_3, full_18] + del slice_120 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_108 = paddle._C_ops.stack(combine_110, 0) + del combine_110 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(roll_16, stack_108) + del roll_16, stack_108 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_111 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_253 = paddle._C_ops.reshape(transpose_111, full_int_array_38) + del transpose_111 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_254 = paddle._C_ops.reshape(reshape_253, full_int_array_39) + del reshape_253 + + # pd_op.full: (1x24x24x1xf32) <- () + full_41 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_41, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_41 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_255 = paddle._C_ops.reshape(set_value__8, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_112 = paddle._C_ops.transpose(reshape_255, [0, 1, 3, 2, 4, 5]) + del reshape_255 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_256 = paddle._C_ops.reshape(transpose_112, full_int_array_27) + del transpose_112 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_257 = paddle._C_ops.reshape(reshape_256, full_int_array_28) + del reshape_256 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_257, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_257, full_int_array_5) + del reshape_257 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_33, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_17 = paddle._C_ops.where(equal_8, full_34, where_16) + del equal_8, where_16 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_70 = paddle._C_ops.shape64(reshape_254) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_121 = paddle._C_ops.slice( + shape64_70, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_70 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_104 = paddle._C_ops.matmul(reshape_254, parameter_88, False, False) + del parameter_88, reshape_254 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_129 = paddle._C_ops.add(matmul_104, parameter_87) + del matmul_104, parameter_87 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_111 = [slice_121, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_109 = paddle._C_ops.stack(combine_111, 0) + del combine_111 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_258 = paddle._C_ops.reshape(add_129, stack_109) + del add_129, stack_109 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_113 = paddle._C_ops.transpose(reshape_258, [2, 0, 3, 1, 4]) + del reshape_258 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_122 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_123 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_124 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_113 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_122, full_8, float("0"), True) + del slice_122 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_114 = paddle._C_ops.transpose(slice_123, [0, 1, 3, 2]) + del slice_123 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_105 = paddle._C_ops.matmul(scale_17, transpose_114, False, False) + del scale_17, transpose_114 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_259 = paddle._C_ops.reshape(data_35, full_int_array_7) + del data_35 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_17 = paddle._C_ops.index_select(data_36, reshape_259, 0) + del data_36, reshape_259 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_260 = paddle._C_ops.reshape(index_select_17, full_int_array_8) + del index_select_17 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_115 = paddle._C_ops.transpose(reshape_260, [2, 0, 1]) + del reshape_260 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(transpose_115, full_int_array_0) + del transpose_115 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_130 = paddle._C_ops.add(matmul_105, unsqueeze_51) + del matmul_105, unsqueeze_51 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_8 = paddle._C_ops.floor_divide(slice_121, full_35) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_112 = [floor_divide_8, full_21, full_29, full_4, full_4] + del floor_divide_8 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_110 = paddle._C_ops.stack(combine_112, 0) + del combine_112 + + # pd_op.reshape: (-1x4x24x144x144xf32) <- (-1x24x144x144xf32, 5xi64) + reshape_261 = paddle._C_ops.reshape(add_130, stack_110) + del add_130, stack_110 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(where_17, full_int_array_1) + del where_17 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(unsqueeze_52, full_int_array_0) + del unsqueeze_52 + + # pd_op.add: (-1x4x24x144x144xf32) <- (-1x4x24x144x144xf32, 1x4x1x144x144xf32) + add_131 = paddle._C_ops.add(reshape_261, unsqueeze_53) + del reshape_261, unsqueeze_53 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_113 = [slice_121, full_29, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_111 = paddle._C_ops.stack(combine_113, 0) + del combine_113 + + # pd_op.reshape: (-1x24x144x144xf32) <- (-1x4x24x144x144xf32, 4xi64) + reshape_262 = paddle._C_ops.reshape(add_131, stack_111) + del add_131, stack_111 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_17 = paddle._C_ops.softmax(reshape_262, -1) + del reshape_262 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_106 = paddle._C_ops.matmul(softmax_17, slice_124, False, False) + del slice_124, softmax_17 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_116 = paddle._C_ops.transpose(matmul_106, [0, 2, 1, 3]) + del matmul_106 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_114 = [slice_121, full_4, full_18] + del slice_121 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_112 = paddle._C_ops.stack(combine_114, 0) + del combine_114 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(transpose_116, stack_112) + del stack_112, transpose_116 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_107 = paddle._C_ops.matmul(reshape_263, parameter_86, False, False) + del parameter_86, reshape_263 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_132 = paddle._C_ops.add(matmul_107, parameter_85) + del matmul_107, parameter_85 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_264 = paddle._C_ops.reshape(add_132, full_int_array_38) + del add_132 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(reshape_264, full_int_array_40) + del reshape_264 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_117 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_266 = paddle._C_ops.reshape(transpose_117, full_int_array_41) + del transpose_117 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_266, full_int_array_29, [1, 2]) + del reshape_266 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_115 = [slice_118, full_31, full_18] + del slice_118 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_113 = paddle._C_ops.stack(combine_115, 0) + del combine_115 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_267 = paddle._C_ops.reshape(roll_17, stack_113) + del roll_17, stack_113 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_133 = paddle._C_ops.add(add_128, reshape_267) + del add_128, reshape_267 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_133, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_108 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del layer_norm_114, parameter_82 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_134 = paddle._C_ops.add(matmul_108, parameter_81) + del matmul_108, parameter_81 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_17 = paddle._C_ops.gelu(add_134, False) + del add_134 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_109 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del gelu_17, parameter_80 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_135 = paddle._C_ops.add(matmul_109, parameter_79) + del matmul_109, parameter_79 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_136 = paddle._C_ops.add(add_133, add_135) + del add_133, add_135 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_71 = paddle._C_ops.shape64(add_136) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_125 = paddle._C_ops.slice( + shape64_71, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_71 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_136, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_116 = [slice_125, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_114 = paddle._C_ops.stack(combine_116, 0) + del combine_116 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_268 = paddle._C_ops.reshape(layer_norm_117, stack_114) + del layer_norm_117, stack_114 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_72 = paddle._C_ops.shape64(reshape_268) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_126 = paddle._C_ops.slice( + shape64_72, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_72 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_117 = [slice_126, full_30, full_3, full_30, full_3, full_18] + del slice_126 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_115 = paddle._C_ops.stack(combine_117, 0) + del combine_117 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_269 = paddle._C_ops.reshape(reshape_268, stack_115) + del reshape_268, stack_115 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_118 = paddle._C_ops.transpose(reshape_269, [0, 1, 3, 2, 4, 5]) + del reshape_269 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_270 = paddle._C_ops.reshape(transpose_118, full_int_array_38) + del transpose_118 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_271 = paddle._C_ops.reshape(reshape_270, full_int_array_39) + del reshape_270 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_73 = paddle._C_ops.shape64(reshape_271) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_127 = paddle._C_ops.slice( + shape64_73, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_73 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_110 = paddle._C_ops.matmul(reshape_271, parameter_76, False, False) + del parameter_76, reshape_271 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_137 = paddle._C_ops.add(matmul_110, parameter_75) + del matmul_110, parameter_75 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_118 = [slice_127, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_116 = paddle._C_ops.stack(combine_118, 0) + del combine_118 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_272 = paddle._C_ops.reshape(add_137, stack_116) + del add_137, stack_116 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_119 = paddle._C_ops.transpose(reshape_272, [2, 0, 3, 1, 4]) + del reshape_272 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_128 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_129 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_130 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_119 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_128, full_8, float("0"), True) + del slice_128 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_120 = paddle._C_ops.transpose(slice_129, [0, 1, 3, 2]) + del slice_129 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_111 = paddle._C_ops.matmul(scale_18, transpose_120, False, False) + del scale_18, transpose_120 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_273 = paddle._C_ops.reshape(data_37, full_int_array_7) + del data_37 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_18 = paddle._C_ops.index_select(data_38, reshape_273, 0) + del data_38, reshape_273 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_274 = paddle._C_ops.reshape(index_select_18, full_int_array_8) + del index_select_18 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_121 = paddle._C_ops.transpose(reshape_274, [2, 0, 1]) + del reshape_274 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(transpose_121, full_int_array_0) + del transpose_121 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_138 = paddle._C_ops.add(matmul_111, unsqueeze_54) + del matmul_111, unsqueeze_54 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_18 = paddle._C_ops.softmax(add_138, -1) + del add_138 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_112 = paddle._C_ops.matmul(softmax_18, slice_130, False, False) + del slice_130, softmax_18 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_122 = paddle._C_ops.transpose(matmul_112, [0, 2, 1, 3]) + del matmul_112 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_119 = [slice_127, full_4, full_18] + del slice_127 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_117 = paddle._C_ops.stack(combine_119, 0) + del combine_119 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(transpose_122, stack_117) + del stack_117, transpose_122 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_113 = paddle._C_ops.matmul(reshape_275, parameter_74, False, False) + del parameter_74, reshape_275 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_139 = paddle._C_ops.add(matmul_113, parameter_73) + del matmul_113, parameter_73 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_276 = paddle._C_ops.reshape(add_139, full_int_array_38) + del add_139 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(reshape_276, full_int_array_40) + del reshape_276 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_123 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_278 = paddle._C_ops.reshape(transpose_123, full_int_array_41) + del transpose_123 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_120 = [slice_125, full_31, full_18] + del slice_125 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_118 = paddle._C_ops.stack(combine_120, 0) + del combine_120 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_279 = paddle._C_ops.reshape(reshape_278, stack_118) + del reshape_278, stack_118 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_140 = paddle._C_ops.add(add_136, reshape_279) + del add_136, reshape_279 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_114 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del layer_norm_120, parameter_70 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_141 = paddle._C_ops.add(matmul_114, parameter_69) + del matmul_114, parameter_69 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_18 = paddle._C_ops.gelu(add_141, False) + del add_141 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_115 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del gelu_18, parameter_68 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_142 = paddle._C_ops.add(matmul_115, parameter_67) + del matmul_115, parameter_67 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_143 = paddle._C_ops.add(add_140, add_142) + del add_140, add_142 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_74 = paddle._C_ops.shape64(add_143) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_131 = paddle._C_ops.slice( + shape64_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_74 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_121 = [slice_131, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_119 = paddle._C_ops.stack(combine_121, 0) + del combine_121 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_280 = paddle._C_ops.reshape(layer_norm_123, stack_119) + del layer_norm_123, stack_119 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_75 = paddle._C_ops.shape64(reshape_280) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_132 = paddle._C_ops.slice( + shape64_75, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_75 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_280, full_int_array_11, [1, 2]) + del reshape_280 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_76 = paddle._C_ops.shape64(roll_18) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_133 = paddle._C_ops.slice( + shape64_76, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_76 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_122 = [slice_133, full_30, full_3, full_30, full_3, full_18] + del slice_133 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_120 = paddle._C_ops.stack(combine_122, 0) + del combine_122 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_281 = paddle._C_ops.reshape(roll_18, stack_120) + del roll_18, stack_120 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_124 = paddle._C_ops.transpose(reshape_281, [0, 1, 3, 2, 4, 5]) + del reshape_281 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_282 = paddle._C_ops.reshape(transpose_124, full_int_array_38) + del transpose_124 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_283 = paddle._C_ops.reshape(reshape_282, full_int_array_39) + del reshape_282 + + # pd_op.full: (1x24x24x1xf32) <- () + full_42 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_42, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_42 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(set_value__9, full_int_array_42) + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_125 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_285 = paddle._C_ops.reshape(transpose_125, full_int_array_27) + del transpose_125 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_286 = paddle._C_ops.reshape(reshape_285, full_int_array_28) + del reshape_285 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_286, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_286, full_int_array_5) + del reshape_286 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_33, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_19 = paddle._C_ops.where(equal_9, full_34, where_18) + del equal_9, where_18 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_77 = paddle._C_ops.shape64(reshape_283) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_134 = paddle._C_ops.slice( + shape64_77, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_77 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_116 = paddle._C_ops.matmul(reshape_283, parameter_64, False, False) + del parameter_64, reshape_283 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_144 = paddle._C_ops.add(matmul_116, parameter_63) + del matmul_116, parameter_63 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_123 = [slice_134, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_121 = paddle._C_ops.stack(combine_123, 0) + del combine_123 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_287 = paddle._C_ops.reshape(add_144, stack_121) + del add_144, stack_121 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_126 = paddle._C_ops.transpose(reshape_287, [2, 0, 3, 1, 4]) + del reshape_287 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_135 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_136 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_137 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_126 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_135, full_8, float("0"), True) + del slice_135 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_127 = paddle._C_ops.transpose(slice_136, [0, 1, 3, 2]) + del slice_136 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_117 = paddle._C_ops.matmul(scale_19, transpose_127, False, False) + del scale_19, transpose_127 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_288 = paddle._C_ops.reshape(data_39, full_int_array_7) + del data_39 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_19 = paddle._C_ops.index_select(data_40, reshape_288, 0) + del data_40, reshape_288 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_289 = paddle._C_ops.reshape(index_select_19, full_int_array_8) + del index_select_19 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_128 = paddle._C_ops.transpose(reshape_289, [2, 0, 1]) + del reshape_289 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(transpose_128, full_int_array_0) + del transpose_128 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_145 = paddle._C_ops.add(matmul_117, unsqueeze_57) + del matmul_117, unsqueeze_57 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_9 = paddle._C_ops.floor_divide(slice_134, full_35) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_124 = [floor_divide_9, full_21, full_29, full_4, full_4] + del floor_divide_9 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_122 = paddle._C_ops.stack(combine_124, 0) + del combine_124 + + # pd_op.reshape: (-1x4x24x144x144xf32) <- (-1x24x144x144xf32, 5xi64) + reshape_290 = paddle._C_ops.reshape(add_145, stack_122) + del add_145, stack_122 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(where_19, full_int_array_1) + del where_19 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(unsqueeze_58, full_int_array_0) + del unsqueeze_58 + + # pd_op.add: (-1x4x24x144x144xf32) <- (-1x4x24x144x144xf32, 1x4x1x144x144xf32) + add_146 = paddle._C_ops.add(reshape_290, unsqueeze_59) + del reshape_290, unsqueeze_59 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_125 = [slice_134, full_29, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_123 = paddle._C_ops.stack(combine_125, 0) + del combine_125 + + # pd_op.reshape: (-1x24x144x144xf32) <- (-1x4x24x144x144xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(add_146, stack_123) + del add_146, stack_123 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_19 = paddle._C_ops.softmax(reshape_291, -1) + del reshape_291 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_118 = paddle._C_ops.matmul(softmax_19, slice_137, False, False) + del slice_137, softmax_19 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_129 = paddle._C_ops.transpose(matmul_118, [0, 2, 1, 3]) + del matmul_118 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_126 = [slice_134, full_4, full_18] + del slice_134 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_124 = paddle._C_ops.stack(combine_126, 0) + del combine_126 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_292 = paddle._C_ops.reshape(transpose_129, stack_124) + del stack_124, transpose_129 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_119 = paddle._C_ops.matmul(reshape_292, parameter_62, False, False) + del parameter_62, reshape_292 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_147 = paddle._C_ops.add(matmul_119, parameter_61) + del matmul_119, parameter_61 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_293 = paddle._C_ops.reshape(add_147, full_int_array_38) + del add_147 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_294 = paddle._C_ops.reshape(reshape_293, full_int_array_40) + del reshape_293 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_130 = paddle._C_ops.transpose(reshape_294, [0, 1, 3, 2, 4, 5]) + del reshape_294 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(transpose_130, full_int_array_41) + del transpose_130 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_295, full_int_array_29, [1, 2]) + del reshape_295 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_127 = [slice_131, full_31, full_18] + del slice_131 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_125 = paddle._C_ops.stack(combine_127, 0) + del combine_127 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_296 = paddle._C_ops.reshape(roll_19, stack_125) + del roll_19, stack_125 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_148 = paddle._C_ops.add(add_143, reshape_296) + del add_143, reshape_296 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_148, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_120 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del layer_norm_126, parameter_58 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_149 = paddle._C_ops.add(matmul_120, parameter_57) + del matmul_120, parameter_57 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_19 = paddle._C_ops.gelu(add_149, False) + del add_149 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_121 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del gelu_19, parameter_56 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_150 = paddle._C_ops.add(matmul_121, parameter_55) + del matmul_121, parameter_55 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_151 = paddle._C_ops.add(add_148, add_150) + del add_148, add_150 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_78 = paddle._C_ops.shape64(add_151) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_138 = paddle._C_ops.slice( + shape64_78, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_78 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_128 = [slice_138, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_126 = paddle._C_ops.stack(combine_128, 0) + del combine_128 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_297 = paddle._C_ops.reshape(layer_norm_129, stack_126) + del layer_norm_129, stack_126 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_79 = paddle._C_ops.shape64(reshape_297) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_139 = paddle._C_ops.slice( + shape64_79, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_79 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_129 = [slice_139, full_30, full_3, full_30, full_3, full_18] + del slice_139 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_127 = paddle._C_ops.stack(combine_129, 0) + del combine_129 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_298 = paddle._C_ops.reshape(reshape_297, stack_127) + del reshape_297, stack_127 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_131 = paddle._C_ops.transpose(reshape_298, [0, 1, 3, 2, 4, 5]) + del reshape_298 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_299 = paddle._C_ops.reshape(transpose_131, full_int_array_38) + del transpose_131 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_300 = paddle._C_ops.reshape(reshape_299, full_int_array_39) + del reshape_299 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_80 = paddle._C_ops.shape64(reshape_300) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_140 = paddle._C_ops.slice( + shape64_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_80 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_122 = paddle._C_ops.matmul(reshape_300, parameter_52, False, False) + del parameter_52, reshape_300 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_152 = paddle._C_ops.add(matmul_122, parameter_51) + del matmul_122, parameter_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_130 = [slice_140, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_128 = paddle._C_ops.stack(combine_130, 0) + del combine_130 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_301 = paddle._C_ops.reshape(add_152, stack_128) + del add_152, stack_128 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_132 = paddle._C_ops.transpose(reshape_301, [2, 0, 3, 1, 4]) + del reshape_301 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_141 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_142 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_143 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_132 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_141, full_8, float("0"), True) + del slice_141 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_133 = paddle._C_ops.transpose(slice_142, [0, 1, 3, 2]) + del slice_142 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_123 = paddle._C_ops.matmul(scale_20, transpose_133, False, False) + del scale_20, transpose_133 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_302 = paddle._C_ops.reshape(data_41, full_int_array_7) + del data_41 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_20 = paddle._C_ops.index_select(data_42, reshape_302, 0) + del data_42, reshape_302 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_303 = paddle._C_ops.reshape(index_select_20, full_int_array_8) + del index_select_20 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_134 = paddle._C_ops.transpose(reshape_303, [2, 0, 1]) + del reshape_303 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(transpose_134, full_int_array_0) + del transpose_134 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_153 = paddle._C_ops.add(matmul_123, unsqueeze_60) + del matmul_123, unsqueeze_60 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_20 = paddle._C_ops.softmax(add_153, -1) + del add_153 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_20, slice_143, False, False) + del slice_143, softmax_20 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_135 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_131 = [slice_140, full_4, full_18] + del slice_140 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_129 = paddle._C_ops.stack(combine_131, 0) + del combine_131 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_304 = paddle._C_ops.reshape(transpose_135, stack_129) + del stack_129, transpose_135 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_125 = paddle._C_ops.matmul(reshape_304, parameter_50, False, False) + del parameter_50, reshape_304 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_154 = paddle._C_ops.add(matmul_125, parameter_49) + del matmul_125, parameter_49 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_305 = paddle._C_ops.reshape(add_154, full_int_array_38) + del add_154 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_306 = paddle._C_ops.reshape(reshape_305, full_int_array_40) + del reshape_305 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_136 = paddle._C_ops.transpose(reshape_306, [0, 1, 3, 2, 4, 5]) + del reshape_306 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(transpose_136, full_int_array_41) + del transpose_136 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_132 = [slice_138, full_31, full_18] + del slice_138 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_130 = paddle._C_ops.stack(combine_132, 0) + del combine_132 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_308 = paddle._C_ops.reshape(reshape_307, stack_130) + del reshape_307, stack_130 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_155 = paddle._C_ops.add(add_151, reshape_308) + del add_151, reshape_308 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_155, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_126 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del layer_norm_132, parameter_46 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_156 = paddle._C_ops.add(matmul_126, parameter_45) + del matmul_126, parameter_45 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_20 = paddle._C_ops.gelu(add_156, False) + del add_156 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_127 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del gelu_20, parameter_44 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_157 = paddle._C_ops.add(matmul_127, parameter_43) + del matmul_127, parameter_43 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_158 = paddle._C_ops.add(add_155, add_157) + del add_155, add_157 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_81 = paddle._C_ops.shape64(add_158) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_144 = paddle._C_ops.slice( + shape64_81, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_81 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_158, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_133 = [slice_144, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_131 = paddle._C_ops.stack(combine_133, 0) + del combine_133 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_309 = paddle._C_ops.reshape(layer_norm_135, stack_131) + del layer_norm_135, stack_131 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_82 = paddle._C_ops.shape64(reshape_309) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_145 = paddle._C_ops.slice( + shape64_82, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_82 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_309, full_int_array_11, [1, 2]) + del reshape_309 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_83 = paddle._C_ops.shape64(roll_20) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_146 = paddle._C_ops.slice( + shape64_83, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_83 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_134 = [slice_146, full_30, full_3, full_30, full_3, full_18] + del full_30, slice_146 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_132 = paddle._C_ops.stack(combine_134, 0) + del combine_134 + + # pd_op.reshape: (-1x2x12x2x12x768xf32) <- (-1x24x24x768xf32, 6xi64) + reshape_310 = paddle._C_ops.reshape(roll_20, stack_132) + del roll_20, stack_132 + + # pd_op.transpose: (-1x2x2x12x12x768xf32) <- (-1x2x12x2x12x768xf32) + transpose_137 = paddle._C_ops.transpose(reshape_310, [0, 1, 3, 2, 4, 5]) + del reshape_310 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x2x2x12x12x768xf32, 4xi64) + reshape_311 = paddle._C_ops.reshape(transpose_137, full_int_array_38) + del transpose_137 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x12x12x768xf32, 3xi64) + reshape_312 = paddle._C_ops.reshape(reshape_311, full_int_array_39) + del full_int_array_39, reshape_311 + + # pd_op.full: (1x24x24x1xf32) <- () + full_43 = paddle._C_ops.full( + [1, 24, 24, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_43, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_43 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x24x24x1xf32) <- (1x24x24x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x12x2x12x1xf32) <- (1x24x24x1xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(set_value__10, full_int_array_42) + del full_int_array_42 + + # pd_op.transpose: (1x2x2x12x12x1xf32) <- (1x2x12x2x12x1xf32) + transpose_138 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (4x12x12x1xf32) <- (1x2x2x12x12x1xf32, 4xi64) + reshape_314 = paddle._C_ops.reshape(transpose_138, full_int_array_27) + del transpose_138 + + # pd_op.reshape: (4x144xf32) <- (4x12x12x1xf32, 2xi64) + reshape_315 = paddle._C_ops.reshape(reshape_314, full_int_array_28) + del reshape_314 + + # pd_op.unsqueeze: (4x1x144xf32) <- (4x144xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_315, full_int_array_1) + + # pd_op.unsqueeze: (4x144x1xf32) <- (4x144xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_315, full_int_array_5) + del reshape_315 + + # pd_op.subtract: (4x144x144xf32) <- (4x1x144xf32, 4x144x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x144x144xb) <- (4x144x144xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_33, subtract_10) + del full_33, not_equal_10, subtract_10 + + # pd_op.equal: (4x144x144xb) <- (4x144x144xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_11) + + # pd_op.where: (4x144x144xf32) <- (4x144x144xb, 4x144x144xf32, 4x144x144xf32) + where_21 = paddle._C_ops.where(equal_10, full_34, where_20) + del equal_10, full_34, where_20 + + # pd_op.shape64: (3xi64) <- (-1x144x768xf32) + shape64_84 = paddle._C_ops.shape64(reshape_312) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_147 = paddle._C_ops.slice( + shape64_84, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_84 + + # pd_op.matmul: (-1x144x2304xf32) <- (-1x144x768xf32, 768x2304xf32) + matmul_128 = paddle._C_ops.matmul(reshape_312, parameter_40, False, False) + del parameter_40, reshape_312 + + # pd_op.add: (-1x144x2304xf32) <- (-1x144x2304xf32, 2304xf32) + add_159 = paddle._C_ops.add(matmul_128, parameter_39) + del matmul_128, parameter_39 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_135 = [slice_147, full_4, full_5, full_29, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_133 = paddle._C_ops.stack(combine_135, 0) + del combine_135 + + # pd_op.reshape: (-1x144x3x24x32xf32) <- (-1x144x2304xf32, 5xi64) + reshape_316 = paddle._C_ops.reshape(add_159, stack_133) + del add_159, stack_133 + + # pd_op.transpose: (3x-1x24x144x32xf32) <- (-1x144x3x24x32xf32) + transpose_139 = paddle._C_ops.transpose(reshape_316, [2, 0, 3, 1, 4]) + del reshape_316 + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_148 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_149 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x144x32xf32) <- (3x-1x24x144x32xf32, 1xi64, 1xi64) + slice_150 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_139 + + # pd_op.scale: (-1x24x144x32xf32) <- (-1x24x144x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_148, full_8, float("0"), True) + del slice_148 + + # pd_op.transpose: (-1x24x32x144xf32) <- (-1x24x144x32xf32) + transpose_140 = paddle._C_ops.transpose(slice_149, [0, 1, 3, 2]) + del slice_149 + + # pd_op.matmul: (-1x24x144x144xf32) <- (-1x24x144x32xf32, -1x24x32x144xf32) + matmul_129 = paddle._C_ops.matmul(scale_21, transpose_140, False, False) + del scale_21, transpose_140 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_317 = paddle._C_ops.reshape(data_43, full_int_array_7) + del data_43 + + # pd_op.index_select: (20736x24xf32) <- (529x24xf32, 20736xi64) + index_select_21 = paddle._C_ops.index_select(data_44, reshape_317, 0) + del data_44, reshape_317 + + # pd_op.reshape: (144x144x24xf32) <- (20736x24xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_21, full_int_array_8) + del index_select_21 + + # pd_op.transpose: (24x144x144xf32) <- (144x144x24xf32) + transpose_141 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x24x144x144xf32) <- (24x144x144xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(transpose_141, full_int_array_0) + del transpose_141 + + # pd_op.add: (-1x24x144x144xf32) <- (-1x24x144x144xf32, 1x24x144x144xf32) + add_160 = paddle._C_ops.add(matmul_129, unsqueeze_63) + del matmul_129, unsqueeze_63 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_10 = paddle._C_ops.floor_divide(slice_147, full_35) + del full_35 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_136 = [floor_divide_10, full_21, full_29, full_4, full_4] + del floor_divide_10, full_21 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_134 = paddle._C_ops.stack(combine_136, 0) + del combine_136 + + # pd_op.reshape: (-1x4x24x144x144xf32) <- (-1x24x144x144xf32, 5xi64) + reshape_319 = paddle._C_ops.reshape(add_160, stack_134) + del add_160, stack_134 + + # pd_op.unsqueeze: (4x1x144x144xf32) <- (4x144x144xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(where_21, full_int_array_1) + del where_21 + + # pd_op.unsqueeze: (1x4x1x144x144xf32) <- (4x1x144x144xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(unsqueeze_64, full_int_array_0) + del unsqueeze_64 + + # pd_op.add: (-1x4x24x144x144xf32) <- (-1x4x24x144x144xf32, 1x4x1x144x144xf32) + add_161 = paddle._C_ops.add(reshape_319, unsqueeze_65) + del reshape_319, unsqueeze_65 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_137 = [slice_147, full_29, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_135 = paddle._C_ops.stack(combine_137, 0) + del combine_137 + + # pd_op.reshape: (-1x24x144x144xf32) <- (-1x4x24x144x144xf32, 4xi64) + reshape_320 = paddle._C_ops.reshape(add_161, stack_135) + del add_161, stack_135 + + # pd_op.softmax: (-1x24x144x144xf32) <- (-1x24x144x144xf32) + softmax_21 = paddle._C_ops.softmax(reshape_320, -1) + del reshape_320 + + # pd_op.matmul: (-1x24x144x32xf32) <- (-1x24x144x144xf32, -1x24x144x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_21, slice_150, False, False) + del slice_150, softmax_21 + + # pd_op.transpose: (-1x144x24x32xf32) <- (-1x24x144x32xf32) + transpose_142 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_138 = [slice_147, full_4, full_18] + del slice_147 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_136 = paddle._C_ops.stack(combine_138, 0) + del combine_138 + + # pd_op.reshape: (-1x144x768xf32) <- (-1x144x24x32xf32, 3xi64) + reshape_321 = paddle._C_ops.reshape(transpose_142, stack_136) + del stack_136, transpose_142 + + # pd_op.matmul: (-1x144x768xf32) <- (-1x144x768xf32, 768x768xf32) + matmul_131 = paddle._C_ops.matmul(reshape_321, parameter_38, False, False) + del parameter_38, reshape_321 + + # pd_op.add: (-1x144x768xf32) <- (-1x144x768xf32, 768xf32) + add_162 = paddle._C_ops.add(matmul_131, parameter_37) + del matmul_131, parameter_37 + + # pd_op.reshape: (-1x12x12x768xf32) <- (-1x144x768xf32, 4xi64) + reshape_322 = paddle._C_ops.reshape(add_162, full_int_array_38) + del add_162, full_int_array_38 + + # pd_op.reshape: (-1x2x2x12x12x768xf32) <- (-1x12x12x768xf32, 6xi64) + reshape_323 = paddle._C_ops.reshape(reshape_322, full_int_array_40) + del full_int_array_40, reshape_322 + + # pd_op.transpose: (-1x2x12x2x12x768xf32) <- (-1x2x2x12x12x768xf32) + transpose_143 = paddle._C_ops.transpose(reshape_323, [0, 1, 3, 2, 4, 5]) + del reshape_323 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x2x12x2x12x768xf32, 4xi64) + reshape_324 = paddle._C_ops.reshape(transpose_143, full_int_array_41) + del full_int_array_41, transpose_143 + + # pd_op.roll: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_324, full_int_array_29, [1, 2]) + del reshape_324 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_139 = [slice_144, full_31, full_18] + del full_31, slice_144 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_137 = paddle._C_ops.stack(combine_139, 0) + del combine_139 + + # pd_op.reshape: (-1x576x768xf32) <- (-1x24x24x768xf32, 3xi64) + reshape_325 = paddle._C_ops.reshape(roll_21, stack_137) + del roll_21, stack_137 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_163 = paddle._C_ops.add(add_158, reshape_325) + del add_158, reshape_325 + + # pd_op.layer_norm: (-1x576x768xf32, -1x576xf32, -1x576xf32) <- (-1x576x768xf32, 768xf32, 768xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_163, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (-1x576x3072xf32) <- (-1x576x768xf32, 768x3072xf32) + matmul_132 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del layer_norm_138, parameter_34 + + # pd_op.add: (-1x576x3072xf32) <- (-1x576x3072xf32, 3072xf32) + add_164 = paddle._C_ops.add(matmul_132, parameter_33) + del matmul_132, parameter_33 + + # pd_op.gelu: (-1x576x3072xf32) <- (-1x576x3072xf32) + gelu_21 = paddle._C_ops.gelu(add_164, False) + del add_164 + + # pd_op.matmul: (-1x576x768xf32) <- (-1x576x3072xf32, 3072x768xf32) + matmul_133 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del gelu_21, parameter_32 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, 768xf32) + add_165 = paddle._C_ops.add(matmul_133, parameter_31) + del matmul_133, parameter_31 + + # pd_op.add: (-1x576x768xf32) <- (-1x576x768xf32, -1x576x768xf32) + add_166 = paddle._C_ops.add(add_163, add_165) + del add_163, add_165 + + # pd_op.shape64: (3xi64) <- (-1x576x768xf32) + shape64_85 = paddle._C_ops.shape64(add_166) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_151 = paddle._C_ops.slice( + shape64_85, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_85 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_140 = [slice_151, full_29, full_29, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_138 = paddle._C_ops.stack(combine_140, 0) + del combine_140 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x576x768xf32, 4xi64) + reshape_326 = paddle._C_ops.reshape(add_166, stack_138) + del add_166, stack_138 + + # pd_op.strided_slice: (-1x12x12x768xf32) <- (-1x24x24x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x12x12x768xf32) <- (-1x24x24x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + del full_int_array_31 + + # pd_op.strided_slice: (-1x12x12x768xf32) <- (-1x24x24x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + del full_int_array_32 + + # pd_op.strided_slice: (-1x12x12x768xf32) <- (-1x24x24x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + del full_int_array_30 + + # pd_op.shape64: (4xi64) <- (-1x24x24x768xf32) + shape64_86 = paddle._C_ops.shape64(reshape_326) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_152 = paddle._C_ops.slice( + shape64_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_86 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_141 = [slice_152, full_29, full_29, full_18] + del full_18, full_29, slice_152 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_139 = paddle._C_ops.stack(combine_141, 0) + del combine_141 + + # pd_op.reshape: (-1x24x24x768xf32) <- (-1x24x24x768xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(reshape_326, stack_139) + del reshape_326, stack_139 + + # builtin.combine: ([-1x12x12x768xf32, -1x12x12x768xf32, -1x12x12x768xf32, -1x12x12x768xf32]) <- (-1x12x12x768xf32, -1x12x12x768xf32, -1x12x12x768xf32, -1x12x12x768xf32) + combine_142 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + del strided_slice_10, strided_slice_11, strided_slice_8, strided_slice_9 + + # pd_op.concat: (-1x12x12x3072xf32) <- ([-1x12x12x768xf32, -1x12x12x768xf32, -1x12x12x768xf32, -1x12x12x768xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_142, full_16) + del combine_142, full_16 + + # pd_op.full: (xi64) <- () + full_44 = paddle._C_ops.full( + [], float("3072"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_143 = [slice_151, full_17, full_44] + del full_17, full_44, slice_151 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_140 = paddle._C_ops.stack(combine_143, 0) + del combine_143 + + # pd_op.reshape: (-1x-1x3072xf32) <- (-1x12x12x3072xf32, 3xi64) + reshape_328 = paddle._C_ops.reshape(concat_2, stack_140) + del concat_2, stack_140 + + # pd_op.layer_norm: (-1x-1x3072xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x3072xf32, 3072xf32, 3072xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_328, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30, reshape_328 + + # pd_op.matmul: (-1x-1x1536xf32) <- (-1x-1x3072xf32, 3072x1536xf32) + matmul_134 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del layer_norm_141, parameter_28 + + # pd_op.shape64: (3xi64) <- (-1x-1x1536xf32) + shape64_87 = paddle._C_ops.shape64(matmul_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_153 = paddle._C_ops.slice( + shape64_87, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_87 + + # pd_op.shape64: (3xi64) <- (-1x-1x1536xf32) + shape64_88 = paddle._C_ops.shape64(matmul_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_154 = paddle._C_ops.slice( + shape64_88, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_88 + + # pd_op.layer_norm: (-1x-1x1536xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1536xf32, 1536xf32, 1536xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_134, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_144 = [slice_153, full_3, full_3, full_28] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_141 = paddle._C_ops.stack(combine_144, 0) + del combine_144 + + # pd_op.reshape: (-1x12x12x1536xf32) <- (-1x-1x1536xf32, 4xi64) + reshape_329 = paddle._C_ops.reshape(layer_norm_144, stack_141) + del layer_norm_144, stack_141 + + # pd_op.shape64: (4xi64) <- (-1x12x12x1536xf32) + shape64_89 = paddle._C_ops.shape64(reshape_329) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_155 = paddle._C_ops.slice( + shape64_89, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_89 + + # pd_op.full: (xi64) <- () + full_45 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_145 = [slice_155, full_45, full_3, full_45, full_3, full_28] + del slice_155 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_142 = paddle._C_ops.stack(combine_145, 0) + del combine_145 + + # pd_op.reshape: (-1x1x12x1x12x1536xf32) <- (-1x12x12x1536xf32, 6xi64) + reshape_330 = paddle._C_ops.reshape(reshape_329, stack_142) + del reshape_329, stack_142 + + # pd_op.transpose: (-1x1x1x12x12x1536xf32) <- (-1x1x12x1x12x1536xf32) + transpose_144 = paddle._C_ops.transpose(reshape_330, [0, 1, 3, 2, 4, 5]) + del reshape_330 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_43 = [-1, 12, 12, 1536] + + # pd_op.reshape: (-1x12x12x1536xf32) <- (-1x1x1x12x12x1536xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(transpose_144, full_int_array_43) + del transpose_144 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_44 = [-1, 144, 1536] + + # pd_op.reshape: (-1x144x1536xf32) <- (-1x12x12x1536xf32, 3xi64) + reshape_332 = paddle._C_ops.reshape(reshape_331, full_int_array_44) + del reshape_331 + + # pd_op.shape64: (3xi64) <- (-1x144x1536xf32) + shape64_90 = paddle._C_ops.shape64(reshape_332) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_156 = paddle._C_ops.slice( + shape64_90, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_90 + + # pd_op.matmul: (-1x144x4608xf32) <- (-1x144x1536xf32, 1536x4608xf32) + matmul_135 = paddle._C_ops.matmul(reshape_332, parameter_25, False, False) + del parameter_25, reshape_332 + + # pd_op.add: (-1x144x4608xf32) <- (-1x144x4608xf32, 4608xf32) + add_167 = paddle._C_ops.add(matmul_135, parameter_24) + del matmul_135, parameter_24 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_146 = [slice_156, full_4, full_5, full_19, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_143 = paddle._C_ops.stack(combine_146, 0) + del combine_146 + + # pd_op.reshape: (-1x144x3x48x32xf32) <- (-1x144x4608xf32, 5xi64) + reshape_333 = paddle._C_ops.reshape(add_167, stack_143) + del add_167, stack_143 + + # pd_op.transpose: (3x-1x48x144x32xf32) <- (-1x144x3x48x32xf32) + transpose_145 = paddle._C_ops.transpose(reshape_333, [2, 0, 3, 1, 4]) + del reshape_333 + + # pd_op.slice: (-1x48x144x32xf32) <- (3x-1x48x144x32xf32, 1xi64, 1xi64) + slice_157 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x48x144x32xf32) <- (3x-1x48x144x32xf32, 1xi64, 1xi64) + slice_158 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x48x144x32xf32) <- (3x-1x48x144x32xf32, 1xi64, 1xi64) + slice_159 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_145 + + # pd_op.scale: (-1x48x144x32xf32) <- (-1x48x144x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_157, full_8, float("0"), True) + del slice_157 + + # pd_op.transpose: (-1x48x32x144xf32) <- (-1x48x144x32xf32) + transpose_146 = paddle._C_ops.transpose(slice_158, [0, 1, 3, 2]) + del slice_158 + + # pd_op.matmul: (-1x48x144x144xf32) <- (-1x48x144x32xf32, -1x48x32x144xf32) + matmul_136 = paddle._C_ops.matmul(scale_22, transpose_146, False, False) + del scale_22, transpose_146 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_334 = paddle._C_ops.reshape(data_45, full_int_array_7) + del data_45 + + # pd_op.index_select: (20736x48xf32) <- (529x48xf32, 20736xi64) + index_select_22 = paddle._C_ops.index_select(data_46, reshape_334, 0) + del data_46, reshape_334 + + # pd_op.reshape: (144x144x48xf32) <- (20736x48xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_22, full_int_array_8) + del index_select_22 + + # pd_op.transpose: (48x144x144xf32) <- (144x144x48xf32) + transpose_147 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x48x144x144xf32) <- (48x144x144xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(transpose_147, full_int_array_0) + del transpose_147 + + # pd_op.add: (-1x48x144x144xf32) <- (-1x48x144x144xf32, 1x48x144x144xf32) + add_168 = paddle._C_ops.add(matmul_136, unsqueeze_66) + del matmul_136, unsqueeze_66 + + # pd_op.softmax: (-1x48x144x144xf32) <- (-1x48x144x144xf32) + softmax_22 = paddle._C_ops.softmax(add_168, -1) + del add_168 + + # pd_op.matmul: (-1x48x144x32xf32) <- (-1x48x144x144xf32, -1x48x144x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_22, slice_159, False, False) + del slice_159, softmax_22 + + # pd_op.transpose: (-1x144x48x32xf32) <- (-1x48x144x32xf32) + transpose_148 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_147 = [slice_156, full_4, full_28] + del slice_156 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_144 = paddle._C_ops.stack(combine_147, 0) + del combine_147 + + # pd_op.reshape: (-1x144x1536xf32) <- (-1x144x48x32xf32, 3xi64) + reshape_336 = paddle._C_ops.reshape(transpose_148, stack_144) + del stack_144, transpose_148 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536x1536xf32) + matmul_138 = paddle._C_ops.matmul(reshape_336, parameter_23, False, False) + del parameter_23, reshape_336 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_169 = paddle._C_ops.add(matmul_138, parameter_22) + del matmul_138, parameter_22 + + # pd_op.reshape: (-1x12x12x1536xf32) <- (-1x144x1536xf32, 4xi64) + reshape_337 = paddle._C_ops.reshape(add_169, full_int_array_43) + del add_169 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_45 = [-1, 1, 1, 12, 12, 1536] + + # pd_op.reshape: (-1x1x1x12x12x1536xf32) <- (-1x12x12x1536xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(reshape_337, full_int_array_45) + del reshape_337 + + # pd_op.transpose: (-1x1x12x1x12x1536xf32) <- (-1x1x1x12x12x1536xf32) + transpose_149 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (-1x12x12x1536xf32) <- (-1x1x12x1x12x1536xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_149, full_int_array_43) + del transpose_149 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_148 = [slice_153, full_4, full_28] + del slice_153 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_145 = paddle._C_ops.stack(combine_148, 0) + del combine_148 + + # pd_op.reshape: (-1x144x1536xf32) <- (-1x12x12x1536xf32, 3xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, stack_145) + del reshape_339, stack_145 + + # pd_op.add: (-1x144x1536xf32) <- (-1x-1x1536xf32, -1x144x1536xf32) + add_170 = paddle._C_ops.add(matmul_134, reshape_340) + del matmul_134, reshape_340 + + # pd_op.layer_norm: (-1x144x1536xf32, -1x144xf32, -1x144xf32) <- (-1x144x1536xf32, 1536xf32, 1536xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_170, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (-1x144x6144xf32) <- (-1x144x1536xf32, 1536x6144xf32) + matmul_139 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del layer_norm_147, parameter_19 + + # pd_op.add: (-1x144x6144xf32) <- (-1x144x6144xf32, 6144xf32) + add_171 = paddle._C_ops.add(matmul_139, parameter_18) + del matmul_139, parameter_18 + + # pd_op.gelu: (-1x144x6144xf32) <- (-1x144x6144xf32) + gelu_22 = paddle._C_ops.gelu(add_171, False) + del add_171 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x6144xf32, 6144x1536xf32) + matmul_140 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del gelu_22, parameter_17 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_172 = paddle._C_ops.add(matmul_140, parameter_16) + del matmul_140, parameter_16 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, -1x144x1536xf32) + add_173 = paddle._C_ops.add(add_170, add_172) + del add_170, add_172 + + # pd_op.shape64: (3xi64) <- (-1x144x1536xf32) + shape64_91 = paddle._C_ops.shape64(add_173) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_160 = paddle._C_ops.slice( + shape64_91, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_91 + + # pd_op.layer_norm: (-1x144x1536xf32, -1x144xf32, -1x144xf32) <- (-1x144x1536xf32, 1536xf32, 1536xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_173, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_149 = [slice_160, full_3, full_3, full_28] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_146 = paddle._C_ops.stack(combine_149, 0) + del combine_149 + + # pd_op.reshape: (-1x12x12x1536xf32) <- (-1x144x1536xf32, 4xi64) + reshape_341 = paddle._C_ops.reshape(layer_norm_150, stack_146) + del layer_norm_150, stack_146 + + # pd_op.shape64: (4xi64) <- (-1x12x12x1536xf32) + shape64_92 = paddle._C_ops.shape64(reshape_341) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_161 = paddle._C_ops.slice( + shape64_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_92 + + # pd_op.roll: (-1x12x12x1536xf32) <- (-1x12x12x1536xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_341, full_int_array_11, [1, 2]) + del reshape_341 + + # pd_op.shape64: (4xi64) <- (-1x12x12x1536xf32) + shape64_93 = paddle._C_ops.shape64(roll_22) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_162 = paddle._C_ops.slice( + shape64_93, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_93 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_150 = [slice_162, full_45, full_3, full_45, full_3, full_28] + del full_3, slice_162 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_147 = paddle._C_ops.stack(combine_150, 0) + del combine_150 + + # pd_op.reshape: (-1x1x12x1x12x1536xf32) <- (-1x12x12x1536xf32, 6xi64) + reshape_342 = paddle._C_ops.reshape(roll_22, stack_147) + del roll_22, stack_147 + + # pd_op.transpose: (-1x1x1x12x12x1536xf32) <- (-1x1x12x1x12x1536xf32) + transpose_150 = paddle._C_ops.transpose(reshape_342, [0, 1, 3, 2, 4, 5]) + del reshape_342 + + # pd_op.reshape: (-1x12x12x1536xf32) <- (-1x1x1x12x12x1536xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(transpose_150, full_int_array_43) + del transpose_150 + + # pd_op.reshape: (-1x144x1536xf32) <- (-1x12x12x1536xf32, 3xi64) + reshape_344 = paddle._C_ops.reshape(reshape_343, full_int_array_44) + del full_int_array_44, reshape_343 + + # pd_op.full: (1x12x12x1xf32) <- () + full_46 = paddle._C_ops.full( + [1, 12, 12, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_46, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_46, full_int_array_12 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_15, set_value__100 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_17, full_int_array_18, set_value__101 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_19, set_value__102 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_13, set_value__103 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_16, full_int_array_21, set_value__104 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_22, full_int_array_23, set_value__105 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_20, full_int_array_24, set_value__106 + + # pd_op.set_value_: (1x12x12x1xf32) <- (1x12x12x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_11, full_int_array_25, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_46 = [1, 1, 12, 1, 12, 1] + + # pd_op.reshape: (1x1x12x1x12x1xf32) <- (1x12x12x1xf32, 6xi64) + reshape_345 = paddle._C_ops.reshape(set_value__11, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (1x1x1x12x12x1xf32) <- (1x1x12x1x12x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_345, [0, 1, 3, 2, 4, 5]) + del reshape_345 + + # pd_op.reshape: (1x12x12x1xf32) <- (1x1x1x12x12x1xf32, 4xi64) + reshape_346 = paddle._C_ops.reshape(transpose_151, full_int_array_27) + del full_int_array_27, transpose_151 + + # pd_op.reshape: (1x144xf32) <- (1x12x12x1xf32, 2xi64) + reshape_347 = paddle._C_ops.reshape(reshape_346, full_int_array_28) + del full_int_array_28, reshape_346 + + # pd_op.unsqueeze: (1x1x144xf32) <- (1x144xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_347, full_int_array_1) + + # pd_op.unsqueeze: (1x144x1xf32) <- (1x144xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_347, full_int_array_5) + del reshape_347 + + # pd_op.subtract: (1x144x144xf32) <- (1x1x144xf32, 1x144x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (1x144x144xb) <- (1x144x144xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_11) + + # pd_op.full: (1x144x144xf32) <- () + full_47 = paddle._C_ops.full( + [1, 144, 144], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x144x144xf32) <- (1x144x144xb, 1x144x144xf32, 1x144x144xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_47, subtract_11) + del full_47, not_equal_11, subtract_11 + + # pd_op.equal: (1x144x144xb) <- (1x144x144xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_11) + del full_11 + + # pd_op.full: (1x144x144xf32) <- () + full_48 = paddle._C_ops.full( + [1, 144, 144], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x144x144xf32) <- (1x144x144xb, 1x144x144xf32, 1x144x144xf32) + where_23 = paddle._C_ops.where(equal_11, full_48, where_22) + del equal_11, full_48, where_22 + + # pd_op.shape64: (3xi64) <- (-1x144x1536xf32) + shape64_94 = paddle._C_ops.shape64(reshape_344) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_163 = paddle._C_ops.slice( + shape64_94, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_94 + + # pd_op.matmul: (-1x144x4608xf32) <- (-1x144x1536xf32, 1536x4608xf32) + matmul_141 = paddle._C_ops.matmul(reshape_344, parameter_13, False, False) + del parameter_13, reshape_344 + + # pd_op.add: (-1x144x4608xf32) <- (-1x144x4608xf32, 4608xf32) + add_174 = paddle._C_ops.add(matmul_141, parameter_12) + del matmul_141, parameter_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_151 = [slice_163, full_4, full_5, full_19, full_7] + del full_5, full_7 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_148 = paddle._C_ops.stack(combine_151, 0) + del combine_151 + + # pd_op.reshape: (-1x144x3x48x32xf32) <- (-1x144x4608xf32, 5xi64) + reshape_348 = paddle._C_ops.reshape(add_174, stack_148) + del add_174, stack_148 + + # pd_op.transpose: (3x-1x48x144x32xf32) <- (-1x144x3x48x32xf32) + transpose_152 = paddle._C_ops.transpose(reshape_348, [2, 0, 3, 1, 4]) + del reshape_348 + + # pd_op.slice: (-1x48x144x32xf32) <- (3x-1x48x144x32xf32, 1xi64, 1xi64) + slice_164 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x48x144x32xf32) <- (3x-1x48x144x32xf32, 1xi64, 1xi64) + slice_165 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x48x144x32xf32) <- (3x-1x48x144x32xf32, 1xi64, 1xi64) + slice_166 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del full_int_array_6, transpose_152 + + # pd_op.scale: (-1x48x144x32xf32) <- (-1x48x144x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_164, full_8, float("0"), True) + del full_8, slice_164 + + # pd_op.transpose: (-1x48x32x144xf32) <- (-1x48x144x32xf32) + transpose_153 = paddle._C_ops.transpose(slice_165, [0, 1, 3, 2]) + del slice_165 + + # pd_op.matmul: (-1x48x144x144xf32) <- (-1x48x144x32xf32, -1x48x32x144xf32) + matmul_142 = paddle._C_ops.matmul(scale_23, transpose_153, False, False) + del scale_23, transpose_153 + + # pd_op.reshape: (20736xi64) <- (144x144xi64, 1xi64) + reshape_349 = paddle._C_ops.reshape(data_47, full_int_array_7) + del data_47, full_int_array_7 + + # pd_op.index_select: (20736x48xf32) <- (529x48xf32, 20736xi64) + index_select_23 = paddle._C_ops.index_select(data_48, reshape_349, 0) + del data_48, reshape_349 + + # pd_op.reshape: (144x144x48xf32) <- (20736x48xf32, 3xi64) + reshape_350 = paddle._C_ops.reshape(index_select_23, full_int_array_8) + del full_int_array_8, index_select_23 + + # pd_op.transpose: (48x144x144xf32) <- (144x144x48xf32) + transpose_154 = paddle._C_ops.transpose(reshape_350, [2, 0, 1]) + del reshape_350 + + # pd_op.unsqueeze: (1x48x144x144xf32) <- (48x144x144xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(transpose_154, full_int_array_0) + del transpose_154 + + # pd_op.add: (-1x48x144x144xf32) <- (-1x48x144x144xf32, 1x48x144x144xf32) + add_175 = paddle._C_ops.add(matmul_142, unsqueeze_69) + del matmul_142, unsqueeze_69 + + # pd_op.full: (xi64) <- () + full_49 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_11 = paddle._C_ops.floor_divide(slice_163, full_49) + del full_49 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_152 = [floor_divide_11, full_45, full_19, full_4, full_4] + del floor_divide_11, full_45 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_149 = paddle._C_ops.stack(combine_152, 0) + del combine_152 + + # pd_op.reshape: (-1x1x48x144x144xf32) <- (-1x48x144x144xf32, 5xi64) + reshape_351 = paddle._C_ops.reshape(add_175, stack_149) + del add_175, stack_149 + + # pd_op.unsqueeze: (1x1x144x144xf32) <- (1x144x144xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(where_23, full_int_array_1) + del full_int_array_1, where_23 + + # pd_op.unsqueeze: (1x1x1x144x144xf32) <- (1x1x144x144xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(unsqueeze_70, full_int_array_0) + del full_int_array_0, unsqueeze_70 + + # pd_op.add: (-1x1x48x144x144xf32) <- (-1x1x48x144x144xf32, 1x1x1x144x144xf32) + add_176 = paddle._C_ops.add(reshape_351, unsqueeze_71) + del reshape_351, unsqueeze_71 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_153 = [slice_163, full_19, full_4, full_4] + del full_19 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_150 = paddle._C_ops.stack(combine_153, 0) + del combine_153 + + # pd_op.reshape: (-1x48x144x144xf32) <- (-1x1x48x144x144xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(add_176, stack_150) + del add_176, stack_150 + + # pd_op.softmax: (-1x48x144x144xf32) <- (-1x48x144x144xf32) + softmax_23 = paddle._C_ops.softmax(reshape_352, -1) + del reshape_352 + + # pd_op.matmul: (-1x48x144x32xf32) <- (-1x48x144x144xf32, -1x48x144x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_23, slice_166, False, False) + del slice_166, softmax_23 + + # pd_op.transpose: (-1x144x48x32xf32) <- (-1x48x144x32xf32) + transpose_155 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_154 = [slice_163, full_4, full_28] + del slice_163 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_151 = paddle._C_ops.stack(combine_154, 0) + del combine_154 + + # pd_op.reshape: (-1x144x1536xf32) <- (-1x144x48x32xf32, 3xi64) + reshape_353 = paddle._C_ops.reshape(transpose_155, stack_151) + del stack_151, transpose_155 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536x1536xf32) + matmul_144 = paddle._C_ops.matmul(reshape_353, parameter_11, False, False) + del parameter_11, reshape_353 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_177 = paddle._C_ops.add(matmul_144, parameter_10) + del matmul_144, parameter_10 + + # pd_op.reshape: (-1x12x12x1536xf32) <- (-1x144x1536xf32, 4xi64) + reshape_354 = paddle._C_ops.reshape(add_177, full_int_array_43) + del add_177 + + # pd_op.reshape: (-1x1x1x12x12x1536xf32) <- (-1x12x12x1536xf32, 6xi64) + reshape_355 = paddle._C_ops.reshape(reshape_354, full_int_array_45) + del full_int_array_45, reshape_354 + + # pd_op.transpose: (-1x1x12x1x12x1536xf32) <- (-1x1x1x12x12x1536xf32) + transpose_156 = paddle._C_ops.transpose(reshape_355, [0, 1, 3, 2, 4, 5]) + del reshape_355 + + # pd_op.reshape: (-1x12x12x1536xf32) <- (-1x1x12x1x12x1536xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(transpose_156, full_int_array_43) + del full_int_array_43, transpose_156 + + # pd_op.roll: (-1x12x12x1536xf32) <- (-1x12x12x1536xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_356, full_int_array_29, [1, 2]) + del full_int_array_29, reshape_356 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_155 = [slice_160, full_4, full_28] + del full_28, full_4, slice_160 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_152 = paddle._C_ops.stack(combine_155, 0) + del combine_155 + + # pd_op.reshape: (-1x144x1536xf32) <- (-1x12x12x1536xf32, 3xi64) + reshape_357 = paddle._C_ops.reshape(roll_23, stack_152) + del roll_23, stack_152 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, -1x144x1536xf32) + add_178 = paddle._C_ops.add(add_173, reshape_357) + del add_173, reshape_357 + + # pd_op.layer_norm: (-1x144x1536xf32, -1x144xf32, -1x144xf32) <- (-1x144x1536xf32, 1536xf32, 1536xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_178, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (-1x144x6144xf32) <- (-1x144x1536xf32, 1536x6144xf32) + matmul_145 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del layer_norm_153, parameter_7 + + # pd_op.add: (-1x144x6144xf32) <- (-1x144x6144xf32, 6144xf32) + add_179 = paddle._C_ops.add(matmul_145, parameter_6) + del matmul_145, parameter_6 + + # pd_op.gelu: (-1x144x6144xf32) <- (-1x144x6144xf32) + gelu_23 = paddle._C_ops.gelu(add_179, False) + del add_179 + + # pd_op.matmul: (-1x144x1536xf32) <- (-1x144x6144xf32, 6144x1536xf32) + matmul_146 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del gelu_23, parameter_5 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, 1536xf32) + add_180 = paddle._C_ops.add(matmul_146, parameter_4) + del matmul_146, parameter_4 + + # pd_op.add: (-1x144x1536xf32) <- (-1x144x1536xf32, -1x144x1536xf32) + add_181 = paddle._C_ops.add(add_178, add_180) + del add_178, add_180 + + # pd_op.layer_norm: (-1x144x1536xf32, -1x144xf32, -1x144xf32) <- (-1x144x1536xf32, 1536xf32, 1536xf32) + layer_norm_156, layer_norm_157, layer_norm_158 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_181, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_181, parameter_2, parameter_3 + + # pd_op.transpose: (-1x1536x144xf32) <- (-1x144x1536xf32) + transpose_157 = paddle._C_ops.transpose(layer_norm_156, [0, 2, 1]) + del layer_norm_156 + + # pd_op.unsqueeze: (-1x1536x1x144xf32) <- (-1x1536x144xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(transpose_157, full_int_array_5) + del transpose_157 + + # pd_op.pool2d: (-1x1536x1x1xf32) <- (-1x1536x1x144xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_72, + full_int_array_14, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_14, unsqueeze_72 + + # pd_op.squeeze: (-1x1536x1xf32) <- (-1x1536x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_5) + del full_int_array_5, pool2d_0 + + # pd_op.flatten: (-1x1536xf32) <- (-1x1536x1xf32) + flatten_1 = paddle._C_ops.flatten(squeeze_0, 1, 2) + del squeeze_0 + + # pd_op.matmul: (-1x102xf32) <- (-1x1536xf32, 1536x102xf32) + matmul_147 = paddle._C_ops.matmul(flatten_1, parameter_1, False, False) + del flatten_1, parameter_1 + + # pd_op.add: (-1x102xf32) <- (-1x102xf32, 102xf32) + add_0 = paddle._C_ops.add(matmul_147, parameter_0) + del matmul_147, parameter_0 + + return ( + add_0, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/weight_meta.py new file mode 100644 index 00000000..88a6a1a4 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window12_384/subgraph_1/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [1536, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [6144, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1536, 6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [1536, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1536, 4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [6144, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [1536, 6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1536, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [1536, 4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [3072, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [1536, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [768, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [192, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/graph_net.json b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/input_meta.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/input_meta.py new file mode 100644 index 00000000..9f98637d --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [8, 3, 224, 224] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [169, 48] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [169, 12] + dtype = "float32" + low = -9.90303 + high = 6.31122 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [169, 48] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [169, 6] + dtype = "float32" + low = -10.3241 + high = 6.98573 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [169, 12] + dtype = "float32" + low = -8.17582 + high = 5.92895 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [169, 6] + dtype = "float32" + low = -8.58318 + high = 5.94374 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/model.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/model.py new file mode 100644 index 00000000..ec8a06d5 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/model.py @@ -0,0 +1,10708 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.conv2d: (8x192x56x56xf32) <- (8x3x224x224xf32, 192x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, -1, 1, 1] + + # pd_op.reshape: (1x192x1x1xf32) <- (192xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_9) + del full_int_array_9, parameter_303 + + # pd_op.add: (8x192x56x56xf32) <- (8x192x56x56xf32, 1x192x1x1xf32) + add_0 = paddle._C_ops.add(conv2d_0, reshape_0) + + # pd_op.flatten: (8x192x3136xf32) <- (8x192x56x56xf32) + flatten_1 = paddle._C_ops.flatten(add_0, 2, 3) + + # pd_op.transpose: (8x3136x192xf32) <- (8x192x3136xf32) + transpose_0 = paddle._C_ops.transpose(flatten_1, [0, 2, 1]) + del flatten_1 + + # pd_op.layer_norm: (8x3136x192xf32, 8x3136xf32, 8x3136xf32) <- (8x3136x192xf32, 192xf32, 192xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302 + + # pd_op.layer_norm: (8x3136x192xf32, 8x3136xf32, 8x3136xf32) <- (8x3136x192xf32, 192xf32, 192xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [8, 56, 56, 192] + + # pd_op.reshape: (8x56x56x192xf32) <- (8x3136x192xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, full_int_array_10) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_11 = [8, 8, 7, 8, 7, 192] + + # pd_op.reshape: (8x8x7x8x7x192xf32) <- (8x56x56x192xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_1, full_int_array_11) + + # pd_op.transpose: (8x8x8x7x7x192xf32) <- (8x8x7x8x7x192xf32) + transpose_1 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_12 = [-1, 7, 7, 192] + + # pd_op.reshape: (512x7x7x192xf32) <- (8x8x8x7x7x192xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(transpose_1, full_int_array_12) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_13 = [-1, 49, 192] + + # pd_op.reshape: (512x49x192xf32) <- (512x7x7x192xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(reshape_2, full_int_array_13) + + # pd_op.matmul: (512x49x576xf32) <- (512x49x192xf32, 192x576xf32) + matmul_0 = paddle._C_ops.matmul(reshape_3, parameter_298, False, False) + del parameter_298 + + # pd_op.add: (512x49x576xf32) <- (512x49x576xf32, 576xf32) + add_1 = paddle._C_ops.add(matmul_0, parameter_297) + del parameter_297 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_14 = [512, 49, 3, 6, 32] + + # pd_op.reshape: (512x49x3x6x32xf32) <- (512x49x576xf32, 5xi64) + reshape_212 = paddle._C_ops.reshape(add_1, full_int_array_14) + + # pd_op.transpose: (3x512x6x49x32xf32) <- (512x49x3x6x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_212, [2, 0, 3, 1, 4]) + del reshape_212 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_264 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_257 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_254 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_247 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_231 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_224 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_221 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_214 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_211 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_204 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_201 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_194 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_191 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_184 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_181 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_174 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_171 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_164 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_161 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_154 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_151 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_144 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_141 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_134 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_131 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_124 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_121 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_114 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_111 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_104 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_101 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_94 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_91 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_84 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_81 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_74 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_71 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_64 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_61 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_54 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_38 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_31 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_28 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_21 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_10 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_3 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_2 = full_int_array_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [1] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_259 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_258 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_249 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_248 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_226 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_225 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_216 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_215 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_206 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_205 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_196 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_195 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_186 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_185 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_176 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_175 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_166 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_165 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_156 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_155 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_146 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_145 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_136 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_135 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_126 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_125 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_116 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_115 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_106 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_105 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_96 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_95 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_86 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_85 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_76 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_75 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_66 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_65 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_56 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_55 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_33 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_32 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_23 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_22 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_5 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_4 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_0 = full_int_array_1 + + # pd_op.slice: (512x6x49x32xf32) <- (3x512x6x49x32xf32, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [2] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_269 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_267 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_261 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_260 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_251 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_250 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_228 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_227 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_218 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_217 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_208 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_207 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_198 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_197 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_188 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_187 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_178 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_177 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_168 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_167 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_158 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_157 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_148 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_147 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_138 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_137 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_128 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_127 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_118 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_117 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_108 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_107 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_98 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_97 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_88 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_87 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_78 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_77 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_68 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_67 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_58 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_57 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_35 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_34 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_25 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_24 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_7 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_6 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_1 = full_int_array_2 + + # pd_op.slice: (512x6x49x32xf32) <- (3x512x6x49x32xf32, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [3] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_262 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_252 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_229 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_219 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_209 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_199 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_189 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_179 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_169 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_159 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_149 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_139 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_129 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_119 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_109 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_99 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_89 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_79 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_69 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_59 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_36 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_26 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_8 = full_int_array_3 + + # pd_op.slice: (512x6x49x32xf32) <- (3x512x6x49x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xf32) <- (1xf32) + assign_263 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_253 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_230 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_220 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_210 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_200 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_190 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_180 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_170 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_160 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_150 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_140 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_130 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_120 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_110 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_100 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_90 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_80 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_70 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_60 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_37 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_27 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_9 = full_0 + + # pd_op.scale: (512x6x49x32xf32) <- (512x6x49x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_24, full_0, float("0"), True) + del slice_24 + + # pd_op.transpose: (512x6x32x49xf32) <- (512x6x49x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_25, [0, 1, 3, 2]) + del slice_25 + + # pd_op.matmul: (512x6x49x49xf32) <- (512x6x49x32xf32, 512x6x32x49xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_15 = [-1] + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_4 = paddle._C_ops.reshape(data_23, full_int_array_15) + del data_23 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_0 = paddle._C_ops.index_select(data_24, reshape_4, 0) + del data_24 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_16 = [49, 49, -1] + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(index_select_0, full_int_array_16) + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_4 = paddle._C_ops.transpose(reshape_213, [2, 0, 1]) + del reshape_213 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_0) + + # pd_op.add: (512x6x49x49xf32) <- (512x6x49x49xf32, 1x6x49x49xf32) + add_170 = paddle._C_ops.add(matmul_1, unsqueeze_0) + + # pd_op.softmax: (512x6x49x49xf32) <- (512x6x49x49xf32) + softmax_0 = paddle._C_ops.softmax(add_170, -1) + del add_170 + + # pd_op.matmul: (512x6x49x32xf32) <- (512x6x49x49xf32, 512x6x49x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_0, slice_0, False, False) + + # pd_op.transpose: (512x49x6x32xf32) <- (512x6x49x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_17 = [512, 49, 192] + + # pd_op.reshape: (512x49x192xf32) <- (512x49x6x32xf32, 3xi64) + reshape_5 = paddle._C_ops.reshape(transpose_5, full_int_array_17) + + # pd_op.matmul: (512x49x192xf32) <- (512x49x192xf32, 192x192xf32) + matmul_2 = paddle._C_ops.matmul(reshape_5, parameter_296, False, False) + del parameter_296 + + # pd_op.add: (512x49x192xf32) <- (512x49x192xf32, 192xf32) + add_2 = paddle._C_ops.add(matmul_2, parameter_295) + del parameter_295 + + # pd_op.reshape: (512x7x7x192xf32) <- (512x49x192xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_2, full_int_array_12) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_18 = [-1, 8, 8, 7, 7, 192] + + # pd_op.reshape: (8x8x8x7x7x192xf32) <- (512x7x7x192xf32, 6xi64) + reshape_214 = paddle._C_ops.reshape(reshape_6, full_int_array_18) + + # pd_op.transpose: (8x8x7x8x7x192xf32) <- (8x8x8x7x7x192xf32) + transpose_6 = paddle._C_ops.transpose(reshape_214, [0, 1, 3, 2, 4, 5]) + del reshape_214 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_19 = [-1, 56, 56, 192] + + # pd_op.reshape: (8x56x56x192xf32) <- (8x8x7x8x7x192xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(transpose_6, full_int_array_19) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_20 = [8, 3136, 192] + + # pd_op.reshape: (8x3136x192xf32) <- (8x56x56x192xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(reshape_7, full_int_array_20) + + # pd_op.add: (8x3136x192xf32) <- (8x3136x192xf32, 8x3136x192xf32) + add_3 = paddle._C_ops.add(layer_norm_0, reshape_8) + + # pd_op.layer_norm: (8x3136x192xf32, 8x3136xf32, 8x3136xf32) <- (8x3136x192xf32, 192xf32, 192xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_3, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (8x3136x768xf32) <- (8x3136x192xf32, 192x768xf32) + matmul_3 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del parameter_292 + + # pd_op.add: (8x3136x768xf32) <- (8x3136x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_291) + del parameter_291 + + # pd_op.gelu: (8x3136x768xf32) <- (8x3136x768xf32) + gelu_0 = paddle._C_ops.gelu(add_4, False) + + # pd_op.matmul: (8x3136x192xf32) <- (8x3136x768xf32, 768x192xf32) + matmul_4 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del parameter_290 + + # pd_op.add: (8x3136x192xf32) <- (8x3136x192xf32, 192xf32) + add_5 = paddle._C_ops.add(matmul_4, parameter_289) + del parameter_289 + + # pd_op.add: (8x3136x192xf32) <- (8x3136x192xf32, 8x3136x192xf32) + add_6 = paddle._C_ops.add(add_3, add_5) + + # pd_op.layer_norm: (8x3136x192xf32, 8x3136xf32, 8x3136xf32) <- (8x3136x192xf32, 192xf32, 192xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # pd_op.reshape: (8x56x56x192xf32) <- (8x3136x192xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(layer_norm_9, full_int_array_10) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_4 = [-3, -3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_256 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_223 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_203 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_183 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_163 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_143 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_123 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_103 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_83 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_63 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_30 = full_int_array_4 + + # pd_op.roll: (8x56x56x192xf32) <- (8x56x56x192xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_9, full_int_array_4, [1, 2]) + + # pd_op.reshape: (8x8x7x8x7x192xf32) <- (8x56x56x192xf32, 6xi64) + reshape_215 = paddle._C_ops.reshape(roll_0, full_int_array_11) + del full_int_array_11 + + # pd_op.transpose: (8x8x8x7x7x192xf32) <- (8x8x7x8x7x192xf32) + transpose_7 = paddle._C_ops.transpose(reshape_215, [0, 1, 3, 2, 4, 5]) + del reshape_215 + + # pd_op.reshape: (512x7x7x192xf32) <- (8x8x8x7x7x192xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(transpose_7, full_int_array_12) + + # pd_op.reshape: (512x49x192xf32) <- (512x7x7x192xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(reshape_10, full_int_array_13) + del full_int_array_13 + + # pd_op.full: (1x56x56x1xf32) <- () + full_25 = paddle._C_ops.full( + [1, 56, 56, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [0, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_234 = full_int_array_21 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_41 = full_int_array_21 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_12 = full_int_array_21 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-7, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [1, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_268 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_243 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_50 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_18 = full_int_array_23 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_25, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_25 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [0, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [-7, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_26 = [0, -3] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_27 = [-7, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [-7, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [-3, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_30 = [-3, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_31 = [-3, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [2147483647, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_33 = [2147483647, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_34 = [2147483647, 2147483647] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_244 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_241 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_238 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_235 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_51 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_48 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_45 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_42 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_19 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_16 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_14 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_13 = full_int_array_34 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [1, 8, 7, 8, 7, 1] + + # pd_op.reshape: (1x8x7x8x7x1xf32) <- (1x56x56x1xf32, 6xi64) + reshape_216 = paddle._C_ops.reshape(set_value__0, full_int_array_35) + del full_int_array_35 + + # pd_op.transpose: (1x8x8x7x7x1xf32) <- (1x8x7x8x7x1xf32) + transpose_146 = paddle._C_ops.transpose(reshape_216, [0, 1, 3, 2, 4, 5]) + del reshape_216 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 7, 7, 1] + + # pd_op.reshape: (64x7x7x1xf32) <- (1x8x8x7x7x1xf32, 4xi64) + reshape_217 = paddle._C_ops.reshape(transpose_146, full_int_array_36) + del transpose_146 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_37 = [-1, 49] + + # pd_op.reshape: (64x49xf32) <- (64x7x7x1xf32, 2xi64) + reshape_218 = paddle._C_ops.reshape(reshape_217, full_int_array_37) + del reshape_217 + + # pd_op.unsqueeze: (64x1x49xf32) <- (64x49xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_1) + + # pd_op.unsqueeze: (64x49x1xf32) <- (64x49xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_2) + del reshape_218 + + # pd_op.subtract: (64x49x49xf32) <- (64x1x49xf32, 64x49x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.full: (xf32) <- () + full_26 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x49x49xb) <- (64x49x49xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_26) + + # pd_op.full: (64x49x49xf32) <- () + full_27 = paddle._C_ops.full( + [64, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_27, subtract_0) + del full_27, not_equal_0, subtract_0 + + # pd_op.equal: (64x49x49xb) <- (64x49x49xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_26) + + # pd_op.full: (64x49x49xf32) <- () + full_28 = paddle._C_ops.full( + [64, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_1 = paddle._C_ops.where(equal_0, full_28, where_0) + del equal_0, full_28, where_0 + + # pd_op.matmul: (512x49x576xf32) <- (512x49x192xf32, 192x576xf32) + matmul_5 = paddle._C_ops.matmul(reshape_11, parameter_286, False, False) + del parameter_286 + + # pd_op.add: (512x49x576xf32) <- (512x49x576xf32, 576xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_285) + del parameter_285 + + # pd_op.reshape: (512x49x3x6x32xf32) <- (512x49x576xf32, 5xi64) + reshape_219 = paddle._C_ops.reshape(add_7, full_int_array_14) + del full_int_array_14 + + # pd_op.transpose: (3x512x6x49x32xf32) <- (512x49x3x6x32xf32) + transpose_8 = paddle._C_ops.transpose(reshape_219, [2, 0, 3, 1, 4]) + del reshape_219 + + # pd_op.slice: (512x6x49x32xf32) <- (3x512x6x49x32xf32, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (512x6x49x32xf32) <- (3x512x6x49x32xf32, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (512x6x49x32xf32) <- (3x512x6x49x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (512x6x49x32xf32) <- (512x6x49x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_26, full_0, float("0"), True) + del slice_26 + + # pd_op.transpose: (512x6x32x49xf32) <- (512x6x49x32xf32) + transpose_9 = paddle._C_ops.transpose(slice_27, [0, 1, 3, 2]) + del slice_27 + + # pd_op.matmul: (512x6x49x49xf32) <- (512x6x49x32xf32, 512x6x32x49xf32) + matmul_6 = paddle._C_ops.matmul(scale_1, transpose_9, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_12 = paddle._C_ops.reshape(data_43, full_int_array_15) + del data_43 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_1 = paddle._C_ops.index_select(data_46, reshape_12, 0) + del data_46 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_220 = paddle._C_ops.reshape(index_select_1, full_int_array_16) + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_10 = paddle._C_ops.transpose(reshape_220, [2, 0, 1]) + del reshape_220 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(transpose_10, full_int_array_0) + + # pd_op.add: (512x6x49x49xf32) <- (512x6x49x49xf32, 1x6x49x49xf32) + add_8 = paddle._C_ops.add(matmul_6, unsqueeze_1) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_38 = [8, 64, 6, 49, 49] + + # pd_op.reshape: (8x64x6x49x49xf32) <- (512x6x49x49xf32, 5xi64) + reshape_13 = paddle._C_ops.reshape(add_8, full_int_array_38) + del full_int_array_38 + + # pd_op.unsqueeze: (64x1x49x49xf32) <- (64x49x49xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(where_1, full_int_array_1) + del where_1 + + # pd_op.unsqueeze: (1x64x1x49x49xf32) <- (64x1x49x49xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(unsqueeze_39, full_int_array_0) + del unsqueeze_39 + + # pd_op.add: (8x64x6x49x49xf32) <- (8x64x6x49x49xf32, 1x64x1x49x49xf32) + add_9 = paddle._C_ops.add(reshape_13, unsqueeze_2) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_39 = [512, 6, 49, 49] + + # pd_op.reshape: (512x6x49x49xf32) <- (8x64x6x49x49xf32, 4xi64) + reshape_221 = paddle._C_ops.reshape(add_9, full_int_array_39) + del full_int_array_39 + + # pd_op.softmax: (512x6x49x49xf32) <- (512x6x49x49xf32) + softmax_1 = paddle._C_ops.softmax(reshape_221, -1) + del reshape_221 + + # pd_op.matmul: (512x6x49x32xf32) <- (512x6x49x49xf32, 512x6x49x32xf32) + matmul_125 = paddle._C_ops.matmul(softmax_1, slice_1, False, False) + + # pd_op.transpose: (512x49x6x32xf32) <- (512x6x49x32xf32) + transpose_11 = paddle._C_ops.transpose(matmul_125, [0, 2, 1, 3]) + del matmul_125 + + # pd_op.reshape: (512x49x192xf32) <- (512x49x6x32xf32, 3xi64) + reshape_14 = paddle._C_ops.reshape(transpose_11, full_int_array_17) + del full_int_array_17 + + # pd_op.matmul: (512x49x192xf32) <- (512x49x192xf32, 192x192xf32) + matmul_7 = paddle._C_ops.matmul(reshape_14, parameter_284, False, False) + del parameter_284 + + # pd_op.add: (512x49x192xf32) <- (512x49x192xf32, 192xf32) + add_10 = paddle._C_ops.add(matmul_7, parameter_283) + del parameter_283 + + # pd_op.reshape: (512x7x7x192xf32) <- (512x49x192xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(add_10, full_int_array_12) + del full_int_array_12 + + # pd_op.reshape: (8x8x8x7x7x192xf32) <- (512x7x7x192xf32, 6xi64) + reshape_222 = paddle._C_ops.reshape(reshape_15, full_int_array_18) + del full_int_array_18 + + # pd_op.transpose: (8x8x7x8x7x192xf32) <- (8x8x8x7x7x192xf32) + transpose_12 = paddle._C_ops.transpose(reshape_222, [0, 1, 3, 2, 4, 5]) + del reshape_222 + + # pd_op.reshape: (8x56x56x192xf32) <- (8x8x7x8x7x192xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(transpose_12, full_int_array_19) + del full_int_array_19 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [3, 3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_265 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_232 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_212 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_192 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_172 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_152 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_132 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_112 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_92 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_72 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_39 = full_int_array_5 + + # pd_op.roll: (8x56x56x192xf32) <- (8x56x56x192xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_16, full_int_array_5, [1, 2]) + + # pd_op.reshape: (8x3136x192xf32) <- (8x56x56x192xf32, 3xi64) + reshape_17 = paddle._C_ops.reshape(roll_1, full_int_array_20) + del full_int_array_20 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], + float("0.995652"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_11 = full_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_40 = [8, 1, 1] + + # pd_op.full: (1xf32) <- () + full_29 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf32) <- () + full_30 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_0 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_171 = paddle._C_ops.add(full_1, uniform_0) + del uniform_0 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_0 = paddle._C_ops.floor(add_171) + del add_171 + + # pd_op.divide: (8x3136x192xf32) <- (8x3136x192xf32, xf32) + divide_0 = paddle._C_ops.divide(reshape_17, full_1) + + # pd_op.multiply: (8x3136x192xf32) <- (8x3136x192xf32, 8x1x1xf32) + multiply_0 = paddle._C_ops.multiply(divide_0, floor_0) + + # pd_op.add: (8x3136x192xf32) <- (8x3136x192xf32, 8x3136x192xf32) + add_11 = paddle._C_ops.add(add_6, multiply_0) + + # pd_op.layer_norm: (8x3136x192xf32, 8x3136xf32, 8x3136xf32) <- (8x3136x192xf32, 192xf32, 192xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (8x3136x768xf32) <- (8x3136x192xf32, 192x768xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del parameter_280 + + # pd_op.add: (8x3136x768xf32) <- (8x3136x768xf32, 768xf32) + add_12 = paddle._C_ops.add(matmul_8, parameter_279) + del parameter_279 + + # pd_op.gelu: (8x3136x768xf32) <- (8x3136x768xf32) + gelu_1 = paddle._C_ops.gelu(add_12, False) + + # pd_op.matmul: (8x3136x192xf32) <- (8x3136x768xf32, 768x192xf32) + matmul_9 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del parameter_278 + + # pd_op.add: (8x3136x192xf32) <- (8x3136x192xf32, 192xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_277) + del parameter_277 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_1 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_172 = paddle._C_ops.add(full_1, uniform_1) + del uniform_1 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_1 = paddle._C_ops.floor(add_172) + del add_172 + + # pd_op.divide: (8x3136x192xf32) <- (8x3136x192xf32, xf32) + divide_1 = paddle._C_ops.divide(add_13, full_1) + + # pd_op.multiply: (8x3136x192xf32) <- (8x3136x192xf32, 8x1x1xf32) + multiply_1 = paddle._C_ops.multiply(divide_1, floor_1) + + # pd_op.add: (8x3136x192xf32) <- (8x3136x192xf32, 8x3136x192xf32) + add_14 = paddle._C_ops.add(add_11, multiply_1) + + # pd_op.reshape: (8x56x56x192xf32) <- (8x3136x192xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_14, full_int_array_10) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_6 = [2, 2] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_245 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_242 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_239 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_236 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_52 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_49 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_46 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_43 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_20 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_17 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_15 = full_int_array_6 + + # pd_op.strided_slice: (8x28x28x192xf32) <- (8x56x56x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_7 = [1, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_237 = full_int_array_7 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_44 = full_int_array_7 + + # pd_op.strided_slice: (8x28x28x192xf32) <- (8x56x56x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_8 = [0, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_240 = full_int_array_8 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_47 = full_int_array_8 + + # pd_op.strided_slice: (8x28x28x192xf32) <- (8x56x56x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (8x28x28x192xf32) <- (8x56x56x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (8x56x56x192xf32) <- (8x56x56x192xf32, 4xi64) + reshape_223 = paddle._C_ops.reshape(reshape_18, full_int_array_10) + del full_int_array_10 + + # pd_op.full: (1xi32) <- () + full_2 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xi32) <- (1xi32) + assign_246 = full_2 + + # pd_op.assign: (1xi32) <- (1xi32) + assign_53 = full_2 + + # builtin.combine: ([8x28x28x192xf32, 8x28x28x192xf32, 8x28x28x192xf32, 8x28x28x192xf32]) <- (8x28x28x192xf32, 8x28x28x192xf32, 8x28x28x192xf32, 8x28x28x192xf32) + combine_0 = [strided_slice_0, strided_slice_1, strided_slice_2, strided_slice_3] + + # pd_op.concat: (8x28x28x768xf32) <- ([8x28x28x192xf32, 8x28x28x192xf32, 8x28x28x192xf32, 8x28x28x192xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_0, full_2) + del combine_0 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_41 = [8, -1, 768] + + # pd_op.reshape: (8x784x768xf32) <- (8x28x28x768xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(concat_0, full_int_array_41) + del full_int_array_41 + + # pd_op.layer_norm: (8x784x768xf32, 8x784xf32, 8x784xf32) <- (8x784x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_19, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276 + + # pd_op.matmul: (8x784x384xf32) <- (8x784x768xf32, 768x384xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del parameter_274 + + # pd_op.layer_norm: (8x784x384xf32, 8x784xf32, 8x784xf32) <- (8x784x384xf32, 384xf32, 384xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_10, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_42 = [8, 28, 28, 384] + + # pd_op.reshape: (8x28x28x384xf32) <- (8x784x384xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(layer_norm_18, full_int_array_42) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_43 = [8, 4, 7, 4, 7, 384] + + # pd_op.reshape: (8x4x7x4x7x384xf32) <- (8x28x28x384xf32, 6xi64) + reshape_224 = paddle._C_ops.reshape(reshape_20, full_int_array_43) + + # pd_op.transpose: (8x4x4x7x7x384xf32) <- (8x4x7x4x7x384xf32) + transpose_13 = paddle._C_ops.transpose(reshape_224, [0, 1, 3, 2, 4, 5]) + del reshape_224 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_44 = [-1, 7, 7, 384] + + # pd_op.reshape: (128x7x7x384xf32) <- (8x4x4x7x7x384xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(transpose_13, full_int_array_44) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_45 = [-1, 49, 384] + + # pd_op.reshape: (128x49x384xf32) <- (128x7x7x384xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(reshape_21, full_int_array_45) + + # pd_op.matmul: (128x49x1152xf32) <- (128x49x384xf32, 384x1152xf32) + matmul_11 = paddle._C_ops.matmul(reshape_22, parameter_271, False, False) + del parameter_271 + + # pd_op.add: (128x49x1152xf32) <- (128x49x1152xf32, 1152xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_270) + del parameter_270 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_46 = [128, 49, 3, 12, 32] + + # pd_op.reshape: (128x49x3x12x32xf32) <- (128x49x1152xf32, 5xi64) + reshape_225 = paddle._C_ops.reshape(add_15, full_int_array_46) + + # pd_op.transpose: (3x128x12x49x32xf32) <- (128x49x3x12x32xf32) + transpose_14 = paddle._C_ops.transpose(reshape_225, [2, 0, 3, 1, 4]) + del reshape_225 + + # pd_op.slice: (128x12x49x32xf32) <- (3x128x12x49x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (128x12x49x32xf32) <- (3x128x12x49x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (128x12x49x32xf32) <- (3x128x12x49x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (128x12x49x32xf32) <- (128x12x49x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_28, full_0, float("0"), True) + del slice_28 + + # pd_op.transpose: (128x12x32x49xf32) <- (128x12x49x32xf32) + transpose_15 = paddle._C_ops.transpose(slice_29, [0, 1, 3, 2]) + del slice_29 + + # pd_op.matmul: (128x12x49x49xf32) <- (128x12x49x32xf32, 128x12x32x49xf32) + matmul_12 = paddle._C_ops.matmul(scale_2, transpose_15, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_23 = paddle._C_ops.reshape(data_17, full_int_array_15) + del data_17 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_2 = paddle._C_ops.index_select(data_20, reshape_23, 0) + del data_20 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_226 = paddle._C_ops.reshape(index_select_2, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_16 = paddle._C_ops.transpose(reshape_226, [2, 0, 1]) + del reshape_226 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_16, full_int_array_0) + + # pd_op.add: (128x12x49x49xf32) <- (128x12x49x49xf32, 1x12x49x49xf32) + add_173 = paddle._C_ops.add(matmul_12, unsqueeze_3) + + # pd_op.softmax: (128x12x49x49xf32) <- (128x12x49x49xf32) + softmax_2 = paddle._C_ops.softmax(add_173, -1) + del add_173 + + # pd_op.matmul: (128x12x49x32xf32) <- (128x12x49x49xf32, 128x12x49x32xf32) + matmul_126 = paddle._C_ops.matmul(softmax_2, slice_2, False, False) + + # pd_op.transpose: (128x49x12x32xf32) <- (128x12x49x32xf32) + transpose_17 = paddle._C_ops.transpose(matmul_126, [0, 2, 1, 3]) + del matmul_126 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_47 = [128, 49, 384] + + # pd_op.reshape: (128x49x384xf32) <- (128x49x12x32xf32, 3xi64) + reshape_24 = paddle._C_ops.reshape(transpose_17, full_int_array_47) + + # pd_op.matmul: (128x49x384xf32) <- (128x49x384xf32, 384x384xf32) + matmul_13 = paddle._C_ops.matmul(reshape_24, parameter_269, False, False) + del parameter_269 + + # pd_op.add: (128x49x384xf32) <- (128x49x384xf32, 384xf32) + add_16 = paddle._C_ops.add(matmul_13, parameter_268) + del parameter_268 + + # pd_op.reshape: (128x7x7x384xf32) <- (128x49x384xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_16, full_int_array_44) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_48 = [-1, 4, 4, 7, 7, 384] + + # pd_op.reshape: (8x4x4x7x7x384xf32) <- (128x7x7x384xf32, 6xi64) + reshape_227 = paddle._C_ops.reshape(reshape_25, full_int_array_48) + + # pd_op.transpose: (8x4x7x4x7x384xf32) <- (8x4x4x7x7x384xf32) + transpose_18 = paddle._C_ops.transpose(reshape_227, [0, 1, 3, 2, 4, 5]) + del reshape_227 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_49 = [-1, 28, 28, 384] + + # pd_op.reshape: (8x28x28x384xf32) <- (8x4x7x4x7x384xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(transpose_18, full_int_array_49) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_50 = [8, 784, 384] + + # pd_op.reshape: (8x784x384xf32) <- (8x28x28x384xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, full_int_array_50) + + # pd_op.full: (xf32) <- () + full_3 = paddle._C_ops.full( + [], + float("0.991304"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_29 = full_3 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_2 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_174 = paddle._C_ops.add(full_3, uniform_2) + del uniform_2 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_2 = paddle._C_ops.floor(add_174) + del add_174 + + # pd_op.divide: (8x784x384xf32) <- (8x784x384xf32, xf32) + divide_2 = paddle._C_ops.divide(reshape_27, full_3) + + # pd_op.multiply: (8x784x384xf32) <- (8x784x384xf32, 8x1x1xf32) + multiply_2 = paddle._C_ops.multiply(divide_2, floor_2) + + # pd_op.add: (8x784x384xf32) <- (8x784x384xf32, 8x784x384xf32) + add_17 = paddle._C_ops.add(matmul_10, multiply_2) + + # pd_op.layer_norm: (8x784x384xf32, 8x784xf32, 8x784xf32) <- (8x784x384xf32, 384xf32, 384xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (8x784x1536xf32) <- (8x784x384xf32, 384x1536xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del parameter_265 + + # pd_op.add: (8x784x1536xf32) <- (8x784x1536xf32, 1536xf32) + add_18 = paddle._C_ops.add(matmul_14, parameter_264) + del parameter_264 + + # pd_op.gelu: (8x784x1536xf32) <- (8x784x1536xf32) + gelu_2 = paddle._C_ops.gelu(add_18, False) + + # pd_op.matmul: (8x784x384xf32) <- (8x784x1536xf32, 1536x384xf32) + matmul_15 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del parameter_263 + + # pd_op.add: (8x784x384xf32) <- (8x784x384xf32, 384xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_262) + del parameter_262 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_3 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_175 = paddle._C_ops.add(full_3, uniform_3) + del uniform_3 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_3 = paddle._C_ops.floor(add_175) + del add_175 + + # pd_op.divide: (8x784x384xf32) <- (8x784x384xf32, xf32) + divide_3 = paddle._C_ops.divide(add_19, full_3) + + # pd_op.multiply: (8x784x384xf32) <- (8x784x384xf32, 8x1x1xf32) + multiply_3 = paddle._C_ops.multiply(divide_3, floor_3) + + # pd_op.add: (8x784x384xf32) <- (8x784x384xf32, 8x784x384xf32) + add_20 = paddle._C_ops.add(add_17, multiply_3) + + # pd_op.layer_norm: (8x784x384xf32, 8x784xf32, 8x784xf32) <- (8x784x384xf32, 384xf32, 384xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # pd_op.reshape: (8x28x28x384xf32) <- (8x784x384xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(layer_norm_24, full_int_array_42) + + # pd_op.roll: (8x28x28x384xf32) <- (8x28x28x384xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_28, full_int_array_4, [1, 2]) + + # pd_op.reshape: (8x4x7x4x7x384xf32) <- (8x28x28x384xf32, 6xi64) + reshape_228 = paddle._C_ops.reshape(roll_2, full_int_array_43) + del full_int_array_43 + + # pd_op.transpose: (8x4x4x7x7x384xf32) <- (8x4x7x4x7x384xf32) + transpose_19 = paddle._C_ops.transpose(reshape_228, [0, 1, 3, 2, 4, 5]) + del reshape_228 + + # pd_op.reshape: (128x7x7x384xf32) <- (8x4x4x7x7x384xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(transpose_19, full_int_array_44) + + # pd_op.reshape: (128x49x384xf32) <- (128x7x7x384xf32, 3xi64) + reshape_30 = paddle._C_ops.reshape(reshape_29, full_int_array_45) + del full_int_array_45 + + # pd_op.full: (1x28x28x1xf32) <- () + full_31 = paddle._C_ops.full( + [1, 28, 28, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_31, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_31 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_51 = [1, 4, 7, 4, 7, 1] + + # pd_op.reshape: (1x4x7x4x7x1xf32) <- (1x28x28x1xf32, 6xi64) + reshape_229 = paddle._C_ops.reshape(set_value__1, full_int_array_51) + del full_int_array_51 + + # pd_op.transpose: (1x4x4x7x7x1xf32) <- (1x4x7x4x7x1xf32) + transpose_147 = paddle._C_ops.transpose(reshape_229, [0, 1, 3, 2, 4, 5]) + del reshape_229 + + # pd_op.reshape: (16x7x7x1xf32) <- (1x4x4x7x7x1xf32, 4xi64) + reshape_230 = paddle._C_ops.reshape(transpose_147, full_int_array_36) + del transpose_147 + + # pd_op.reshape: (16x49xf32) <- (16x7x7x1xf32, 2xi64) + reshape_231 = paddle._C_ops.reshape(reshape_230, full_int_array_37) + del reshape_230 + + # pd_op.unsqueeze: (16x1x49xf32) <- (16x49xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_1) + + # pd_op.unsqueeze: (16x49x1xf32) <- (16x49xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_2) + del reshape_231 + + # pd_op.subtract: (16x49x49xf32) <- (16x1x49xf32, 16x49x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_40, unsqueeze_41) + del unsqueeze_40, unsqueeze_41 + + # pd_op.not_equal: (16x49x49xb) <- (16x49x49xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_26) + + # pd_op.full: (16x49x49xf32) <- () + full_32 = paddle._C_ops.full( + [16, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_32, subtract_1) + del full_32, not_equal_1, subtract_1 + + # pd_op.equal: (16x49x49xb) <- (16x49x49xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_26) + + # pd_op.full: (16x49x49xf32) <- () + full_33 = paddle._C_ops.full( + [16, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_3 = paddle._C_ops.where(equal_1, full_33, where_2) + del equal_1, full_33, where_2 + + # pd_op.matmul: (128x49x1152xf32) <- (128x49x384xf32, 384x1152xf32) + matmul_16 = paddle._C_ops.matmul(reshape_30, parameter_259, False, False) + del parameter_259 + + # pd_op.add: (128x49x1152xf32) <- (128x49x1152xf32, 1152xf32) + add_21 = paddle._C_ops.add(matmul_16, parameter_258) + del parameter_258 + + # pd_op.reshape: (128x49x3x12x32xf32) <- (128x49x1152xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_21, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (3x128x12x49x32xf32) <- (128x49x3x12x32xf32) + transpose_20 = paddle._C_ops.transpose(reshape_232, [2, 0, 3, 1, 4]) + del reshape_232 + + # pd_op.slice: (128x12x49x32xf32) <- (3x128x12x49x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (128x12x49x32xf32) <- (3x128x12x49x32xf32, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (128x12x49x32xf32) <- (3x128x12x49x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (128x12x49x32xf32) <- (128x12x49x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_30, full_0, float("0"), True) + del slice_30 + + # pd_op.transpose: (128x12x32x49xf32) <- (128x12x49x32xf32) + transpose_21 = paddle._C_ops.transpose(slice_31, [0, 1, 3, 2]) + del slice_31 + + # pd_op.matmul: (128x12x49x49xf32) <- (128x12x49x32xf32, 128x12x32x49xf32) + matmul_17 = paddle._C_ops.matmul(scale_3, transpose_21, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_31 = paddle._C_ops.reshape(data_25, full_int_array_15) + del data_25 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_3 = paddle._C_ops.index_select(data_26, reshape_31, 0) + del data_26 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_233 = paddle._C_ops.reshape(index_select_3, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_22 = paddle._C_ops.transpose(reshape_233, [2, 0, 1]) + del reshape_233 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_22, full_int_array_0) + + # pd_op.add: (128x12x49x49xf32) <- (128x12x49x49xf32, 1x12x49x49xf32) + add_22 = paddle._C_ops.add(matmul_17, unsqueeze_4) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_52 = [8, 16, 12, 49, 49] + + # pd_op.reshape: (8x16x12x49x49xf32) <- (128x12x49x49xf32, 5xi64) + reshape_32 = paddle._C_ops.reshape(add_22, full_int_array_52) + del full_int_array_52 + + # pd_op.unsqueeze: (16x1x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(where_3, full_int_array_1) + del where_3 + + # pd_op.unsqueeze: (1x16x1x49x49xf32) <- (16x1x49x49xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_42, full_int_array_0) + del unsqueeze_42 + + # pd_op.add: (8x16x12x49x49xf32) <- (8x16x12x49x49xf32, 1x16x1x49x49xf32) + add_23 = paddle._C_ops.add(reshape_32, unsqueeze_5) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_53 = [128, 12, 49, 49] + + # pd_op.reshape: (128x12x49x49xf32) <- (8x16x12x49x49xf32, 4xi64) + reshape_234 = paddle._C_ops.reshape(add_23, full_int_array_53) + del full_int_array_53 + + # pd_op.softmax: (128x12x49x49xf32) <- (128x12x49x49xf32) + softmax_3 = paddle._C_ops.softmax(reshape_234, -1) + del reshape_234 + + # pd_op.matmul: (128x12x49x32xf32) <- (128x12x49x49xf32, 128x12x49x32xf32) + matmul_127 = paddle._C_ops.matmul(softmax_3, slice_3, False, False) + + # pd_op.transpose: (128x49x12x32xf32) <- (128x12x49x32xf32) + transpose_23 = paddle._C_ops.transpose(matmul_127, [0, 2, 1, 3]) + del matmul_127 + + # pd_op.reshape: (128x49x384xf32) <- (128x49x12x32xf32, 3xi64) + reshape_33 = paddle._C_ops.reshape(transpose_23, full_int_array_47) + del full_int_array_47 + + # pd_op.matmul: (128x49x384xf32) <- (128x49x384xf32, 384x384xf32) + matmul_18 = paddle._C_ops.matmul(reshape_33, parameter_257, False, False) + del parameter_257 + + # pd_op.add: (128x49x384xf32) <- (128x49x384xf32, 384xf32) + add_24 = paddle._C_ops.add(matmul_18, parameter_256) + del parameter_256 + + # pd_op.reshape: (128x7x7x384xf32) <- (128x49x384xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_24, full_int_array_44) + del full_int_array_44 + + # pd_op.reshape: (8x4x4x7x7x384xf32) <- (128x7x7x384xf32, 6xi64) + reshape_235 = paddle._C_ops.reshape(reshape_34, full_int_array_48) + del full_int_array_48 + + # pd_op.transpose: (8x4x7x4x7x384xf32) <- (8x4x4x7x7x384xf32) + transpose_24 = paddle._C_ops.transpose(reshape_235, [0, 1, 3, 2, 4, 5]) + del reshape_235 + + # pd_op.reshape: (8x28x28x384xf32) <- (8x4x7x4x7x384xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_24, full_int_array_49) + del full_int_array_49 + + # pd_op.roll: (8x28x28x384xf32) <- (8x28x28x384xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_35, full_int_array_5, [1, 2]) + + # pd_op.reshape: (8x784x384xf32) <- (8x28x28x384xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(roll_3, full_int_array_50) + del full_int_array_50 + + # pd_op.full: (xf32) <- () + full_4 = paddle._C_ops.full( + [], + float("0.986957"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_40 = full_4 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_4 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_176 = paddle._C_ops.add(full_4, uniform_4) + del uniform_4 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_4 = paddle._C_ops.floor(add_176) + del add_176 + + # pd_op.divide: (8x784x384xf32) <- (8x784x384xf32, xf32) + divide_4 = paddle._C_ops.divide(reshape_36, full_4) + + # pd_op.multiply: (8x784x384xf32) <- (8x784x384xf32, 8x1x1xf32) + multiply_4 = paddle._C_ops.multiply(divide_4, floor_4) + + # pd_op.add: (8x784x384xf32) <- (8x784x384xf32, 8x784x384xf32) + add_25 = paddle._C_ops.add(add_20, multiply_4) + + # pd_op.layer_norm: (8x784x384xf32, 8x784xf32, 8x784xf32) <- (8x784x384xf32, 384xf32, 384xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (8x784x1536xf32) <- (8x784x384xf32, 384x1536xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del parameter_253 + + # pd_op.add: (8x784x1536xf32) <- (8x784x1536xf32, 1536xf32) + add_26 = paddle._C_ops.add(matmul_19, parameter_252) + del parameter_252 + + # pd_op.gelu: (8x784x1536xf32) <- (8x784x1536xf32) + gelu_3 = paddle._C_ops.gelu(add_26, False) + + # pd_op.matmul: (8x784x384xf32) <- (8x784x1536xf32, 1536x384xf32) + matmul_20 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del parameter_251 + + # pd_op.add: (8x784x384xf32) <- (8x784x384xf32, 384xf32) + add_27 = paddle._C_ops.add(matmul_20, parameter_250) + del parameter_250 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_5 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_177 = paddle._C_ops.add(full_4, uniform_5) + del uniform_5 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_5 = paddle._C_ops.floor(add_177) + del add_177 + + # pd_op.divide: (8x784x384xf32) <- (8x784x384xf32, xf32) + divide_5 = paddle._C_ops.divide(add_27, full_4) + + # pd_op.multiply: (8x784x384xf32) <- (8x784x384xf32, 8x1x1xf32) + multiply_5 = paddle._C_ops.multiply(divide_5, floor_5) + + # pd_op.add: (8x784x384xf32) <- (8x784x384xf32, 8x784x384xf32) + add_28 = paddle._C_ops.add(add_25, multiply_5) + + # pd_op.reshape: (8x28x28x384xf32) <- (8x784x384xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_28, full_int_array_42) + + # pd_op.strided_slice: (8x14x14x384xf32) <- (8x28x28x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (8x14x14x384xf32) <- (8x28x28x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (8x14x14x384xf32) <- (8x28x28x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (8x14x14x384xf32) <- (8x28x28x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (8x28x28x384xf32) <- (8x28x28x384xf32, 4xi64) + reshape_236 = paddle._C_ops.reshape(reshape_37, full_int_array_42) + del full_int_array_42 + + # builtin.combine: ([8x14x14x384xf32, 8x14x14x384xf32, 8x14x14x384xf32, 8x14x14x384xf32]) <- (8x14x14x384xf32, 8x14x14x384xf32, 8x14x14x384xf32, 8x14x14x384xf32) + combine_1 = [strided_slice_4, strided_slice_5, strided_slice_6, strided_slice_7] + + # pd_op.concat: (8x14x14x1536xf32) <- ([8x14x14x384xf32, 8x14x14x384xf32, 8x14x14x384xf32, 8x14x14x384xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_1, full_2) + del combine_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_54 = [8, -1, 1536] + + # pd_op.reshape: (8x196x1536xf32) <- (8x14x14x1536xf32, 3xi64) + reshape_38 = paddle._C_ops.reshape(concat_1, full_int_array_54) + del full_int_array_54 + + # pd_op.layer_norm: (8x196x1536xf32, 8x196xf32, 8x196xf32) <- (8x196x1536xf32, 1536xf32, 1536xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_38, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249 + + # pd_op.matmul: (8x196x768xf32) <- (8x196x1536xf32, 1536x768xf32) + matmul_21 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del parameter_247 + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_21, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_55 = [8, 14, 14, 768] + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(layer_norm_33, full_int_array_55) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_56 = [8, 2, 7, 2, 7, 768] + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_237 = paddle._C_ops.reshape(reshape_39, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_25 = paddle._C_ops.transpose(reshape_237, [0, 1, 3, 2, 4, 5]) + del reshape_237 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_57 = [-1, 7, 7, 768] + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(transpose_25, full_int_array_57) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_58 = [-1, 49, 768] + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(reshape_40, full_int_array_58) + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_22 = paddle._C_ops.matmul(reshape_41, parameter_244, False, False) + del parameter_244 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_29 = paddle._C_ops.add(matmul_22, parameter_243) + del parameter_243 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_59 = [32, 49, 3, 24, 32] + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_238 = paddle._C_ops.reshape(add_29, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_26 = paddle._C_ops.transpose(reshape_238, [2, 0, 3, 1, 4]) + del reshape_238 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_32, full_0, float("0"), True) + del slice_32 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_27 = paddle._C_ops.transpose(slice_33, [0, 1, 3, 2]) + del slice_33 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_23 = paddle._C_ops.matmul(scale_4, transpose_27, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_42 = paddle._C_ops.reshape(data_27, full_int_array_15) + del data_27 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_4 = paddle._C_ops.index_select(data_28, reshape_42, 0) + del data_28 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_239 = paddle._C_ops.reshape(index_select_4, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_28 = paddle._C_ops.transpose(reshape_239, [2, 0, 1]) + del reshape_239 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_28, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_178 = paddle._C_ops.add(matmul_23, unsqueeze_6) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_4 = paddle._C_ops.softmax(add_178, -1) + del add_178 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_128 = paddle._C_ops.matmul(softmax_4, slice_4, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_29 = paddle._C_ops.transpose(matmul_128, [0, 2, 1, 3]) + del matmul_128 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_60 = [32, 49, 768] + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_29, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_24 = paddle._C_ops.matmul(reshape_43, parameter_242, False, False) + del parameter_242 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_241) + del parameter_241 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_30, full_int_array_57) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_61 = [-1, 2, 2, 7, 7, 768] + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_44, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_30 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_62 = [-1, 14, 14, 768] + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(transpose_30, full_int_array_62) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_63 = [8, 196, 768] + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_46 = paddle._C_ops.reshape(reshape_45, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_5 = paddle._C_ops.full( + [], + float("0.982609"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_62 = full_5 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_6 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_179 = paddle._C_ops.add(full_5, uniform_6) + del uniform_6 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_6 = paddle._C_ops.floor(add_179) + del add_179 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_6 = paddle._C_ops.divide(reshape_46, full_5) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_6 = paddle._C_ops.multiply(divide_6, floor_6) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_31 = paddle._C_ops.add(matmul_21, multiply_6) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_31, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del parameter_238 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_32 = paddle._C_ops.add(matmul_25, parameter_237) + del parameter_237 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_32, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_26 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del parameter_236 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_33 = paddle._C_ops.add(matmul_26, parameter_235) + del parameter_235 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_7 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_180 = paddle._C_ops.add(full_5, uniform_7) + del uniform_7 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_7 = paddle._C_ops.floor(add_180) + del add_180 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_7 = paddle._C_ops.divide(add_33, full_5) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_7 = paddle._C_ops.multiply(divide_7, floor_7) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_34 = paddle._C_ops.add(add_31, multiply_7) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(layer_norm_39, full_int_array_55) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_47, full_int_array_4, [1, 2]) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_241 = paddle._C_ops.reshape(roll_4, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_31 = paddle._C_ops.transpose(reshape_241, [0, 1, 3, 2, 4, 5]) + del reshape_241 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(transpose_31, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_49 = paddle._C_ops.reshape(reshape_48, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_34 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_34, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_64 = [1, 2, 7, 2, 7, 1] + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_242 = paddle._C_ops.reshape(set_value__2, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_148 = paddle._C_ops.transpose(reshape_242, [0, 1, 3, 2, 4, 5]) + del reshape_242 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_243 = paddle._C_ops.reshape(transpose_148, full_int_array_36) + del transpose_148 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_244 = paddle._C_ops.reshape(reshape_243, full_int_array_37) + del reshape_243 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_2) + del reshape_244 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_26) + + # pd_op.full: (4x49x49xf32) <- () + full_35 = paddle._C_ops.full( + [4, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_35, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_26) + + # pd_op.full: (4x49x49xf32) <- () + full_36 = paddle._C_ops.full( + [4, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_5 = paddle._C_ops.where(equal_2, full_36, where_4) + del equal_2, where_4 + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_27 = paddle._C_ops.matmul(reshape_49, parameter_232, False, False) + del parameter_232 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_35 = paddle._C_ops.add(matmul_27, parameter_231) + del parameter_231 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_245 = paddle._C_ops.reshape(add_35, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_32 = paddle._C_ops.transpose(reshape_245, [2, 0, 3, 1, 4]) + del reshape_245 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_34, full_0, float("0"), True) + del slice_34 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_33 = paddle._C_ops.transpose(slice_35, [0, 1, 3, 2]) + del slice_35 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_28 = paddle._C_ops.matmul(scale_5, transpose_33, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_50 = paddle._C_ops.reshape(data_29, full_int_array_15) + del data_29 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_5 = paddle._C_ops.index_select(data_30, reshape_50, 0) + del data_30 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(index_select_5, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_34 = paddle._C_ops.transpose(reshape_246, [2, 0, 1]) + del reshape_246 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(transpose_34, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_36 = paddle._C_ops.add(matmul_28, unsqueeze_7) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_65 = [8, 4, 24, 49, 49] + + # pd_op.reshape: (8x4x24x49x49xf32) <- (32x24x49x49xf32, 5xi64) + reshape_51 = paddle._C_ops.reshape(add_36, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(where_5, full_int_array_1) + del where_5 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(unsqueeze_45, full_int_array_0) + del unsqueeze_45 + + # pd_op.add: (8x4x24x49x49xf32) <- (8x4x24x49x49xf32, 1x4x1x49x49xf32) + add_37 = paddle._C_ops.add(reshape_51, unsqueeze_8) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_66 = [32, 24, 49, 49] + + # pd_op.reshape: (32x24x49x49xf32) <- (8x4x24x49x49xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_37, full_int_array_66) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_5 = paddle._C_ops.softmax(reshape_247, -1) + del reshape_247 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_129 = paddle._C_ops.matmul(softmax_5, slice_5, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_35 = paddle._C_ops.transpose(matmul_129, [0, 2, 1, 3]) + del matmul_129 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_52 = paddle._C_ops.reshape(transpose_35, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_29 = paddle._C_ops.matmul(reshape_52, parameter_230, False, False) + del parameter_230 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_38 = paddle._C_ops.add(matmul_29, parameter_229) + del parameter_229 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(add_38, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_53, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_36 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(transpose_36, full_int_array_62) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_54, full_int_array_5, [1, 2]) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(roll_5, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_6 = paddle._C_ops.full( + [], + float("0.978261"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_73 = full_6 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_8 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_181 = paddle._C_ops.add(full_6, uniform_8) + del uniform_8 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_8 = paddle._C_ops.floor(add_181) + del add_181 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_8 = paddle._C_ops.divide(reshape_55, full_6) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_8 = paddle._C_ops.multiply(divide_8, floor_8) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_39 = paddle._C_ops.add(add_34, multiply_8) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_39, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del parameter_226 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_40 = paddle._C_ops.add(matmul_30, parameter_225) + del parameter_225 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_40, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_31 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del parameter_224 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_41 = paddle._C_ops.add(matmul_31, parameter_223) + del parameter_223 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_9 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_182 = paddle._C_ops.add(full_6, uniform_9) + del uniform_9 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_9 = paddle._C_ops.floor(add_182) + del add_182 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_9 = paddle._C_ops.divide(add_41, full_6) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_9 = paddle._C_ops.multiply(divide_9, floor_9) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_42 = paddle._C_ops.add(add_39, multiply_9) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(layer_norm_45, full_int_array_55) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_249 = paddle._C_ops.reshape(reshape_56, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_37 = paddle._C_ops.transpose(reshape_249, [0, 1, 3, 2, 4, 5]) + del reshape_249 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(transpose_37, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_58 = paddle._C_ops.reshape(reshape_57, full_int_array_58) + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_32 = paddle._C_ops.matmul(reshape_58, parameter_220, False, False) + del parameter_220 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_43 = paddle._C_ops.add(matmul_32, parameter_219) + del parameter_219 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_250 = paddle._C_ops.reshape(add_43, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_38 = paddle._C_ops.transpose(reshape_250, [2, 0, 3, 1, 4]) + del reshape_250 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_36, full_0, float("0"), True) + del slice_36 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_39 = paddle._C_ops.transpose(slice_37, [0, 1, 3, 2]) + del slice_37 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_33 = paddle._C_ops.matmul(scale_6, transpose_39, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_59 = paddle._C_ops.reshape(data_31, full_int_array_15) + del data_31 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_6 = paddle._C_ops.index_select(data_32, reshape_59, 0) + del data_32 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_251 = paddle._C_ops.reshape(index_select_6, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_40 = paddle._C_ops.transpose(reshape_251, [2, 0, 1]) + del reshape_251 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_40, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_183 = paddle._C_ops.add(matmul_33, unsqueeze_9) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_6 = paddle._C_ops.softmax(add_183, -1) + del add_183 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_6, slice_6, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_41 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_60 = paddle._C_ops.reshape(transpose_41, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(reshape_60, parameter_218, False, False) + del parameter_218 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_44 = paddle._C_ops.add(matmul_34, parameter_217) + del parameter_217 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(add_44, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(reshape_61, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_42 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(transpose_42, full_int_array_62) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_7 = paddle._C_ops.full( + [], + float("0.973913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_82 = full_7 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_10 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_184 = paddle._C_ops.add(full_7, uniform_10) + del uniform_10 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_10 = paddle._C_ops.floor(add_184) + del add_184 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_10 = paddle._C_ops.divide(reshape_63, full_7) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_10 = paddle._C_ops.multiply(divide_10, floor_10) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_45 = paddle._C_ops.add(add_42, multiply_10) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del parameter_214 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_46 = paddle._C_ops.add(matmul_35, parameter_213) + del parameter_213 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_46, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_36 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del parameter_212 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_47 = paddle._C_ops.add(matmul_36, parameter_211) + del parameter_211 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_11 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_185 = paddle._C_ops.add(full_7, uniform_11) + del uniform_11 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_11 = paddle._C_ops.floor(add_185) + del add_185 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_11 = paddle._C_ops.divide(add_47, full_7) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_11 = paddle._C_ops.multiply(divide_11, floor_11) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_48 = paddle._C_ops.add(add_45, multiply_11) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(layer_norm_51, full_int_array_55) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_64, full_int_array_4, [1, 2]) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_253 = paddle._C_ops.reshape(roll_6, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_43 = paddle._C_ops.transpose(reshape_253, [0, 1, 3, 2, 4, 5]) + del reshape_253 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(transpose_43, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_37 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_37, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_254 = paddle._C_ops.reshape(set_value__3, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_149 = paddle._C_ops.transpose(reshape_254, [0, 1, 3, 2, 4, 5]) + del reshape_254 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_255 = paddle._C_ops.reshape(transpose_149, full_int_array_36) + del transpose_149 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_256 = paddle._C_ops.reshape(reshape_255, full_int_array_37) + del reshape_255 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_2) + del reshape_256 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_46, unsqueeze_47) + del unsqueeze_46, unsqueeze_47 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_35, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_7 = paddle._C_ops.where(equal_3, full_36, where_6) + del equal_3, where_6 + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_37 = paddle._C_ops.matmul(reshape_66, parameter_208, False, False) + del parameter_208 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_49 = paddle._C_ops.add(matmul_37, parameter_207) + del parameter_207 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_257 = paddle._C_ops.reshape(add_49, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_44 = paddle._C_ops.transpose(reshape_257, [2, 0, 3, 1, 4]) + del reshape_257 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_38, full_0, float("0"), True) + del slice_38 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_45 = paddle._C_ops.transpose(slice_39, [0, 1, 3, 2]) + del slice_39 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_38 = paddle._C_ops.matmul(scale_7, transpose_45, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_67 = paddle._C_ops.reshape(data_33, full_int_array_15) + del data_33 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_7 = paddle._C_ops.index_select(data_34, reshape_67, 0) + del data_34 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_258 = paddle._C_ops.reshape(index_select_7, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_46 = paddle._C_ops.transpose(reshape_258, [2, 0, 1]) + del reshape_258 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(transpose_46, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_50 = paddle._C_ops.add(matmul_38, unsqueeze_10) + + # pd_op.reshape: (8x4x24x49x49xf32) <- (32x24x49x49xf32, 5xi64) + reshape_68 = paddle._C_ops.reshape(add_50, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(where_7, full_int_array_1) + del where_7 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_48, full_int_array_0) + del unsqueeze_48 + + # pd_op.add: (8x4x24x49x49xf32) <- (8x4x24x49x49xf32, 1x4x1x49x49xf32) + add_51 = paddle._C_ops.add(reshape_68, unsqueeze_11) + + # pd_op.reshape: (32x24x49x49xf32) <- (8x4x24x49x49xf32, 4xi64) + reshape_259 = paddle._C_ops.reshape(add_51, full_int_array_66) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_7 = paddle._C_ops.softmax(reshape_259, -1) + del reshape_259 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_131 = paddle._C_ops.matmul(softmax_7, slice_7, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_47 = paddle._C_ops.transpose(matmul_131, [0, 2, 1, 3]) + del matmul_131 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(transpose_47, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_39 = paddle._C_ops.matmul(reshape_69, parameter_206, False, False) + del parameter_206 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_52 = paddle._C_ops.add(matmul_39, parameter_205) + del parameter_205 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(add_52, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_260 = paddle._C_ops.reshape(reshape_70, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_48 = paddle._C_ops.transpose(reshape_260, [0, 1, 3, 2, 4, 5]) + del reshape_260 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(transpose_48, full_int_array_62) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_71, full_int_array_5, [1, 2]) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(roll_7, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_8 = paddle._C_ops.full( + [], + float("0.969565"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_93 = full_8 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_12 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_186 = paddle._C_ops.add(full_8, uniform_12) + del uniform_12 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_12 = paddle._C_ops.floor(add_186) + del add_186 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_12 = paddle._C_ops.divide(reshape_72, full_8) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_12 = paddle._C_ops.multiply(divide_12, floor_12) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_53 = paddle._C_ops.add(add_48, multiply_12) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del parameter_202 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_54 = paddle._C_ops.add(matmul_40, parameter_201) + del parameter_201 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_54, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_41 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del parameter_200 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_55 = paddle._C_ops.add(matmul_41, parameter_199) + del parameter_199 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_13 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_187 = paddle._C_ops.add(full_8, uniform_13) + del uniform_13 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_13 = paddle._C_ops.floor(add_187) + del add_187 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_13 = paddle._C_ops.divide(add_55, full_8) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_13 = paddle._C_ops.multiply(divide_13, floor_13) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_56 = paddle._C_ops.add(add_53, multiply_13) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(layer_norm_57, full_int_array_55) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_261 = paddle._C_ops.reshape(reshape_73, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_49 = paddle._C_ops.transpose(reshape_261, [0, 1, 3, 2, 4, 5]) + del reshape_261 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(transpose_49, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_75 = paddle._C_ops.reshape(reshape_74, full_int_array_58) + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_42 = paddle._C_ops.matmul(reshape_75, parameter_196, False, False) + del parameter_196 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_57 = paddle._C_ops.add(matmul_42, parameter_195) + del parameter_195 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_262 = paddle._C_ops.reshape(add_57, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_50 = paddle._C_ops.transpose(reshape_262, [2, 0, 3, 1, 4]) + del reshape_262 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_40, full_0, float("0"), True) + del slice_40 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_51 = paddle._C_ops.transpose(slice_41, [0, 1, 3, 2]) + del slice_41 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_43 = paddle._C_ops.matmul(scale_8, transpose_51, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_76 = paddle._C_ops.reshape(data_35, full_int_array_15) + del data_35 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_8 = paddle._C_ops.index_select(data_36, reshape_76, 0) + del data_36 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(index_select_8, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_52 = paddle._C_ops.transpose(reshape_263, [2, 0, 1]) + del reshape_263 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_52, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_188 = paddle._C_ops.add(matmul_43, unsqueeze_12) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_8 = paddle._C_ops.softmax(add_188, -1) + del add_188 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_132 = paddle._C_ops.matmul(softmax_8, slice_8, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_53 = paddle._C_ops.transpose(matmul_132, [0, 2, 1, 3]) + del matmul_132 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_77 = paddle._C_ops.reshape(transpose_53, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_44 = paddle._C_ops.matmul(reshape_77, parameter_194, False, False) + del parameter_194 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_58 = paddle._C_ops.add(matmul_44, parameter_193) + del parameter_193 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(add_58, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_264 = paddle._C_ops.reshape(reshape_78, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_54 = paddle._C_ops.transpose(reshape_264, [0, 1, 3, 2, 4, 5]) + del reshape_264 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_54, full_int_array_62) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_9 = paddle._C_ops.full( + [], + float("0.965217"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_102 = full_9 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_14 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_189 = paddle._C_ops.add(full_9, uniform_14) + del uniform_14 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_14 = paddle._C_ops.floor(add_189) + del add_189 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_14 = paddle._C_ops.divide(reshape_80, full_9) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_14 = paddle._C_ops.multiply(divide_14, floor_14) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_59 = paddle._C_ops.add(add_56, multiply_14) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_59, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_45 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_60 = paddle._C_ops.add(matmul_45, parameter_189) + del parameter_189 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_60, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_46 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del parameter_188 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_61 = paddle._C_ops.add(matmul_46, parameter_187) + del parameter_187 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_15 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_190 = paddle._C_ops.add(full_9, uniform_15) + del uniform_15 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_15 = paddle._C_ops.floor(add_190) + del add_190 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_15 = paddle._C_ops.divide(add_61, full_9) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_15 = paddle._C_ops.multiply(divide_15, floor_15) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_62 = paddle._C_ops.add(add_59, multiply_15) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(layer_norm_63, full_int_array_55) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_81, full_int_array_4, [1, 2]) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(roll_8, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_55 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_55, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_38 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_38, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_266 = paddle._C_ops.reshape(set_value__4, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_150 = paddle._C_ops.transpose(reshape_266, [0, 1, 3, 2, 4, 5]) + del reshape_266 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_267 = paddle._C_ops.reshape(transpose_150, full_int_array_36) + del transpose_150 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_268 = paddle._C_ops.reshape(reshape_267, full_int_array_37) + del reshape_267 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_2) + del reshape_268 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_35, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_9 = paddle._C_ops.where(equal_4, full_36, where_8) + del equal_4, where_8 + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_47 = paddle._C_ops.matmul(reshape_83, parameter_184, False, False) + del parameter_184 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_63 = paddle._C_ops.add(matmul_47, parameter_183) + del parameter_183 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_269 = paddle._C_ops.reshape(add_63, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_56 = paddle._C_ops.transpose(reshape_269, [2, 0, 3, 1, 4]) + del reshape_269 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_42, full_0, float("0"), True) + del slice_42 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_57 = paddle._C_ops.transpose(slice_43, [0, 1, 3, 2]) + del slice_43 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_48 = paddle._C_ops.matmul(scale_9, transpose_57, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_84 = paddle._C_ops.reshape(data_37, full_int_array_15) + del data_37 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_9 = paddle._C_ops.index_select(data_38, reshape_84, 0) + del data_38 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_270 = paddle._C_ops.reshape(index_select_9, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_58 = paddle._C_ops.transpose(reshape_270, [2, 0, 1]) + del reshape_270 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(transpose_58, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_64 = paddle._C_ops.add(matmul_48, unsqueeze_13) + + # pd_op.reshape: (8x4x24x49x49xf32) <- (32x24x49x49xf32, 5xi64) + reshape_85 = paddle._C_ops.reshape(add_64, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(where_9, full_int_array_1) + del where_9 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(unsqueeze_51, full_int_array_0) + del unsqueeze_51 + + # pd_op.add: (8x4x24x49x49xf32) <- (8x4x24x49x49xf32, 1x4x1x49x49xf32) + add_65 = paddle._C_ops.add(reshape_85, unsqueeze_14) + + # pd_op.reshape: (32x24x49x49xf32) <- (8x4x24x49x49xf32, 4xi64) + reshape_271 = paddle._C_ops.reshape(add_65, full_int_array_66) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_9 = paddle._C_ops.softmax(reshape_271, -1) + del reshape_271 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_133 = paddle._C_ops.matmul(softmax_9, slice_9, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_59 = paddle._C_ops.transpose(matmul_133, [0, 2, 1, 3]) + del matmul_133 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(transpose_59, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(reshape_86, parameter_182, False, False) + del parameter_182 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_66 = paddle._C_ops.add(matmul_49, parameter_181) + del parameter_181 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(add_66, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_272 = paddle._C_ops.reshape(reshape_87, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_60 = paddle._C_ops.transpose(reshape_272, [0, 1, 3, 2, 4, 5]) + del reshape_272 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(transpose_60, full_int_array_62) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_88, full_int_array_5, [1, 2]) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(roll_9, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_10 = paddle._C_ops.full( + [], + float("0.96087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_113 = full_10 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_16 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_191 = paddle._C_ops.add(full_10, uniform_16) + del uniform_16 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_16 = paddle._C_ops.floor(add_191) + del add_191 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_16 = paddle._C_ops.divide(reshape_89, full_10) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_16 = paddle._C_ops.multiply(divide_16, floor_16) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_67 = paddle._C_ops.add(add_62, multiply_16) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_67, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del parameter_178 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_68 = paddle._C_ops.add(matmul_50, parameter_177) + del parameter_177 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_68, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_51 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del parameter_176 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_69 = paddle._C_ops.add(matmul_51, parameter_175) + del parameter_175 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_17 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_192 = paddle._C_ops.add(full_10, uniform_17) + del uniform_17 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_17 = paddle._C_ops.floor(add_192) + del add_192 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_17 = paddle._C_ops.divide(add_69, full_10) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_17 = paddle._C_ops.multiply(divide_17, floor_17) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_70 = paddle._C_ops.add(add_67, multiply_17) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(layer_norm_69, full_int_array_55) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_273 = paddle._C_ops.reshape(reshape_90, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_61 = paddle._C_ops.transpose(reshape_273, [0, 1, 3, 2, 4, 5]) + del reshape_273 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_91 = paddle._C_ops.reshape(transpose_61, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_92 = paddle._C_ops.reshape(reshape_91, full_int_array_58) + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_52 = paddle._C_ops.matmul(reshape_92, parameter_172, False, False) + del parameter_172 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_71 = paddle._C_ops.add(matmul_52, parameter_171) + del parameter_171 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_274 = paddle._C_ops.reshape(add_71, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_62 = paddle._C_ops.transpose(reshape_274, [2, 0, 3, 1, 4]) + del reshape_274 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_44, full_0, float("0"), True) + del slice_44 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_63 = paddle._C_ops.transpose(slice_45, [0, 1, 3, 2]) + del slice_45 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_53 = paddle._C_ops.matmul(scale_10, transpose_63, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_93 = paddle._C_ops.reshape(data_39, full_int_array_15) + del data_39 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_10 = paddle._C_ops.index_select(data_40, reshape_93, 0) + del data_40 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(index_select_10, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_64 = paddle._C_ops.transpose(reshape_275, [2, 0, 1]) + del reshape_275 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_64, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_193 = paddle._C_ops.add(matmul_53, unsqueeze_15) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_10 = paddle._C_ops.softmax(add_193, -1) + del add_193 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_134 = paddle._C_ops.matmul(softmax_10, slice_10, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_65 = paddle._C_ops.transpose(matmul_134, [0, 2, 1, 3]) + del matmul_134 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_94 = paddle._C_ops.reshape(transpose_65, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(reshape_94, parameter_170, False, False) + del parameter_170 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_72 = paddle._C_ops.add(matmul_54, parameter_169) + del parameter_169 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_95 = paddle._C_ops.reshape(add_72, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_276 = paddle._C_ops.reshape(reshape_95, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_66 = paddle._C_ops.transpose(reshape_276, [0, 1, 3, 2, 4, 5]) + del reshape_276 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_66, full_int_array_62) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], + float("0.956522"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_122 = full_11 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_18 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_194 = paddle._C_ops.add(full_11, uniform_18) + del uniform_18 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_18 = paddle._C_ops.floor(add_194) + del add_194 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_18 = paddle._C_ops.divide(reshape_97, full_11) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_18 = paddle._C_ops.multiply(divide_18, floor_18) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_73 = paddle._C_ops.add(add_70, multiply_18) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del parameter_166 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_74 = paddle._C_ops.add(matmul_55, parameter_165) + del parameter_165 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_74, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_56 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del parameter_164 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_56, parameter_163) + del parameter_163 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_19 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_195 = paddle._C_ops.add(full_11, uniform_19) + del uniform_19 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_19 = paddle._C_ops.floor(add_195) + del add_195 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_19 = paddle._C_ops.divide(add_75, full_11) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_19 = paddle._C_ops.multiply(divide_19, floor_19) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_76 = paddle._C_ops.add(add_73, multiply_19) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(layer_norm_75, full_int_array_55) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_98, full_int_array_4, [1, 2]) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(roll_10, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_67 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_99 = paddle._C_ops.reshape(transpose_67, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(reshape_99, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_39 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_39, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_278 = paddle._C_ops.reshape(set_value__5, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_278, [0, 1, 3, 2, 4, 5]) + del reshape_278 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_279 = paddle._C_ops.reshape(transpose_151, full_int_array_36) + del transpose_151 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_280 = paddle._C_ops.reshape(reshape_279, full_int_array_37) + del reshape_279 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_2) + del reshape_280 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_52, unsqueeze_53) + del unsqueeze_52, unsqueeze_53 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_35, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_11 = paddle._C_ops.where(equal_5, full_36, where_10) + del equal_5, where_10 + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_57 = paddle._C_ops.matmul(reshape_100, parameter_160, False, False) + del parameter_160 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_77 = paddle._C_ops.add(matmul_57, parameter_159) + del parameter_159 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_281 = paddle._C_ops.reshape(add_77, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_68 = paddle._C_ops.transpose(reshape_281, [2, 0, 3, 1, 4]) + del reshape_281 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_46, full_0, float("0"), True) + del slice_46 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_69 = paddle._C_ops.transpose(slice_47, [0, 1, 3, 2]) + del slice_47 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_58 = paddle._C_ops.matmul(scale_11, transpose_69, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_101 = paddle._C_ops.reshape(data_41, full_int_array_15) + del data_41 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_11 = paddle._C_ops.index_select(data_42, reshape_101, 0) + del data_42 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_282 = paddle._C_ops.reshape(index_select_11, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_70 = paddle._C_ops.transpose(reshape_282, [2, 0, 1]) + del reshape_282 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(transpose_70, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_78 = paddle._C_ops.add(matmul_58, unsqueeze_16) + + # pd_op.reshape: (8x4x24x49x49xf32) <- (32x24x49x49xf32, 5xi64) + reshape_102 = paddle._C_ops.reshape(add_78, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(where_11, full_int_array_1) + del where_11 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_54, full_int_array_0) + del unsqueeze_54 + + # pd_op.add: (8x4x24x49x49xf32) <- (8x4x24x49x49xf32, 1x4x1x49x49xf32) + add_79 = paddle._C_ops.add(reshape_102, unsqueeze_17) + + # pd_op.reshape: (32x24x49x49xf32) <- (8x4x24x49x49xf32, 4xi64) + reshape_283 = paddle._C_ops.reshape(add_79, full_int_array_66) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_11 = paddle._C_ops.softmax(reshape_283, -1) + del reshape_283 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_135 = paddle._C_ops.matmul(softmax_11, slice_11, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_71 = paddle._C_ops.transpose(matmul_135, [0, 2, 1, 3]) + del matmul_135 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_103 = paddle._C_ops.reshape(transpose_71, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(reshape_103, parameter_158, False, False) + del parameter_158 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_80 = paddle._C_ops.add(matmul_59, parameter_157) + del parameter_157 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(add_80, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(reshape_104, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_72 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_105 = paddle._C_ops.reshape(transpose_72, full_int_array_62) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_105, full_int_array_5, [1, 2]) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_106 = paddle._C_ops.reshape(roll_11, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_12 = paddle._C_ops.full( + [], + float("0.952174"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_133 = full_12 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_20 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_196 = paddle._C_ops.add(full_12, uniform_20) + del uniform_20 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_20 = paddle._C_ops.floor(add_196) + del add_196 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_20 = paddle._C_ops.divide(reshape_106, full_12) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_20 = paddle._C_ops.multiply(divide_20, floor_20) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_81 = paddle._C_ops.add(add_76, multiply_20) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del parameter_154 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_82 = paddle._C_ops.add(matmul_60, parameter_153) + del parameter_153 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_82, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_61 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del parameter_152 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_83 = paddle._C_ops.add(matmul_61, parameter_151) + del parameter_151 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_21 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_197 = paddle._C_ops.add(full_12, uniform_21) + del uniform_21 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_21 = paddle._C_ops.floor(add_197) + del add_197 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_21 = paddle._C_ops.divide(add_83, full_12) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_21 = paddle._C_ops.multiply(divide_21, floor_21) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_84 = paddle._C_ops.add(add_81, multiply_21) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_84, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(layer_norm_81, full_int_array_55) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_285 = paddle._C_ops.reshape(reshape_107, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_73 = paddle._C_ops.transpose(reshape_285, [0, 1, 3, 2, 4, 5]) + del reshape_285 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_73, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_58) + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_62 = paddle._C_ops.matmul(reshape_109, parameter_148, False, False) + del parameter_148 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_85 = paddle._C_ops.add(matmul_62, parameter_147) + del parameter_147 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_286 = paddle._C_ops.reshape(add_85, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_286, [2, 0, 3, 1, 4]) + del reshape_286 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_48, full_0, float("0"), True) + del slice_48 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_49, [0, 1, 3, 2]) + del slice_49 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_63 = paddle._C_ops.matmul(scale_12, transpose_75, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_110 = paddle._C_ops.reshape(data_44, full_int_array_15) + del data_44 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_12 = paddle._C_ops.index_select(data_45, reshape_110, 0) + del data_45 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_287 = paddle._C_ops.reshape(index_select_12, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_76 = paddle._C_ops.transpose(reshape_287, [2, 0, 1]) + del reshape_287 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_198 = paddle._C_ops.add(matmul_63, unsqueeze_18) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_12 = paddle._C_ops.softmax(add_198, -1) + del add_198 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_136 = paddle._C_ops.matmul(softmax_12, slice_12, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_136, [0, 2, 1, 3]) + del matmul_136 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(transpose_77, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_64 = paddle._C_ops.matmul(reshape_111, parameter_146, False, False) + del parameter_146 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_86 = paddle._C_ops.add(matmul_64, parameter_145) + del parameter_145 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_112 = paddle._C_ops.reshape(add_86, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_288 = paddle._C_ops.reshape(reshape_112, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_78 = paddle._C_ops.transpose(reshape_288, [0, 1, 3, 2, 4, 5]) + del reshape_288 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_113 = paddle._C_ops.reshape(transpose_78, full_int_array_62) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_114 = paddle._C_ops.reshape(reshape_113, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_13 = paddle._C_ops.full( + [], + float("0.947826"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_142 = full_13 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_22 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_199 = paddle._C_ops.add(full_13, uniform_22) + del uniform_22 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_22 = paddle._C_ops.floor(add_199) + del add_199 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_22 = paddle._C_ops.divide(reshape_114, full_13) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_22 = paddle._C_ops.multiply(divide_22, floor_22) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_87 = paddle._C_ops.add(add_84, multiply_22) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del parameter_142 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_88 = paddle._C_ops.add(matmul_65, parameter_141) + del parameter_141 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_12 = paddle._C_ops.gelu(add_88, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_66 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del parameter_140 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_89 = paddle._C_ops.add(matmul_66, parameter_139) + del parameter_139 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_23 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_200 = paddle._C_ops.add(full_13, uniform_23) + del uniform_23 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_23 = paddle._C_ops.floor(add_200) + del add_200 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_23 = paddle._C_ops.divide(add_89, full_13) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_23 = paddle._C_ops.multiply(divide_23, floor_23) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_90 = paddle._C_ops.add(add_87, multiply_23) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(layer_norm_87, full_int_array_55) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_115, full_int_array_4, [1, 2]) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_289 = paddle._C_ops.reshape(roll_12, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_79 = paddle._C_ops.transpose(reshape_289, [0, 1, 3, 2, 4, 5]) + del reshape_289 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_116 = paddle._C_ops.reshape(transpose_79, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_117 = paddle._C_ops.reshape(reshape_116, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_40 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_40, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_290 = paddle._C_ops.reshape(set_value__6, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_152 = paddle._C_ops.transpose(reshape_290, [0, 1, 3, 2, 4, 5]) + del reshape_290 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(transpose_152, full_int_array_36) + del transpose_152 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_292 = paddle._C_ops.reshape(reshape_291, full_int_array_37) + del reshape_291 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_2) + del reshape_292 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_35, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_13 = paddle._C_ops.where(equal_6, full_36, where_12) + del equal_6, where_12 + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_67 = paddle._C_ops.matmul(reshape_117, parameter_136, False, False) + del parameter_136 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_91 = paddle._C_ops.add(matmul_67, parameter_135) + del parameter_135 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_293 = paddle._C_ops.reshape(add_91, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_293, [2, 0, 3, 1, 4]) + del reshape_293 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_50, full_0, float("0"), True) + del slice_50 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_51, [0, 1, 3, 2]) + del slice_51 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_68 = paddle._C_ops.matmul(scale_13, transpose_81, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_118 = paddle._C_ops.reshape(data_47, full_int_array_15) + del data_47 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_13 = paddle._C_ops.index_select(data_48, reshape_118, 0) + del data_48 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_294 = paddle._C_ops.reshape(index_select_13, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_82 = paddle._C_ops.transpose(reshape_294, [2, 0, 1]) + del reshape_294 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_92 = paddle._C_ops.add(matmul_68, unsqueeze_19) + + # pd_op.reshape: (8x4x24x49x49xf32) <- (32x24x49x49xf32, 5xi64) + reshape_119 = paddle._C_ops.reshape(add_92, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(where_13, full_int_array_1) + del where_13 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(unsqueeze_57, full_int_array_0) + del unsqueeze_57 + + # pd_op.add: (8x4x24x49x49xf32) <- (8x4x24x49x49xf32, 1x4x1x49x49xf32) + add_93 = paddle._C_ops.add(reshape_119, unsqueeze_20) + + # pd_op.reshape: (32x24x49x49xf32) <- (8x4x24x49x49xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(add_93, full_int_array_66) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_13 = paddle._C_ops.softmax(reshape_295, -1) + del reshape_295 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_13, slice_13, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_120 = paddle._C_ops.reshape(transpose_83, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_69 = paddle._C_ops.matmul(reshape_120, parameter_134, False, False) + del parameter_134 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_69, parameter_133) + del parameter_133 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(add_94, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_296 = paddle._C_ops.reshape(reshape_121, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_84 = paddle._C_ops.transpose(reshape_296, [0, 1, 3, 2, 4, 5]) + del reshape_296 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_122 = paddle._C_ops.reshape(transpose_84, full_int_array_62) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_122, full_int_array_5, [1, 2]) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_123 = paddle._C_ops.reshape(roll_13, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_14 = paddle._C_ops.full( + [], + float("0.943478"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_153 = full_14 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_24 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_201 = paddle._C_ops.add(full_14, uniform_24) + del uniform_24 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_24 = paddle._C_ops.floor(add_201) + del add_201 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_24 = paddle._C_ops.divide(reshape_123, full_14) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_24 = paddle._C_ops.multiply(divide_24, floor_24) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_95 = paddle._C_ops.add(add_90, multiply_24) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del parameter_130 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_96 = paddle._C_ops.add(matmul_70, parameter_129) + del parameter_129 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_13 = paddle._C_ops.gelu(add_96, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_71 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del parameter_128 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_97 = paddle._C_ops.add(matmul_71, parameter_127) + del parameter_127 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_25 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_202 = paddle._C_ops.add(full_14, uniform_25) + del uniform_25 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_25 = paddle._C_ops.floor(add_202) + del add_202 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_25 = paddle._C_ops.divide(add_97, full_14) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_25 = paddle._C_ops.multiply(divide_25, floor_25) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_98 = paddle._C_ops.add(add_95, multiply_25) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_124 = paddle._C_ops.reshape(layer_norm_93, full_int_array_55) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_297 = paddle._C_ops.reshape(reshape_124, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_85 = paddle._C_ops.transpose(reshape_297, [0, 1, 3, 2, 4, 5]) + del reshape_297 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_85, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_58) + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_72 = paddle._C_ops.matmul(reshape_126, parameter_124, False, False) + del parameter_124 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_99 = paddle._C_ops.add(matmul_72, parameter_123) + del parameter_123 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_298 = paddle._C_ops.reshape(add_99, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_86 = paddle._C_ops.transpose(reshape_298, [2, 0, 3, 1, 4]) + del reshape_298 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_52, full_0, float("0"), True) + del slice_52 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_87 = paddle._C_ops.transpose(slice_53, [0, 1, 3, 2]) + del slice_53 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_73 = paddle._C_ops.matmul(scale_14, transpose_87, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_127 = paddle._C_ops.reshape(data_1, full_int_array_15) + del data_1 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_14 = paddle._C_ops.index_select(data_2, reshape_127, 0) + del data_2 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_299 = paddle._C_ops.reshape(index_select_14, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_88 = paddle._C_ops.transpose(reshape_299, [2, 0, 1]) + del reshape_299 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_88, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_203 = paddle._C_ops.add(matmul_73, unsqueeze_21) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_14 = paddle._C_ops.softmax(add_203, -1) + del add_203 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_138 = paddle._C_ops.matmul(softmax_14, slice_14, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_89 = paddle._C_ops.transpose(matmul_138, [0, 2, 1, 3]) + del matmul_138 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_128 = paddle._C_ops.reshape(transpose_89, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(reshape_128, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_100 = paddle._C_ops.add(matmul_74, parameter_121) + del parameter_121 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_129 = paddle._C_ops.reshape(add_100, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_300 = paddle._C_ops.reshape(reshape_129, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_90 = paddle._C_ops.transpose(reshape_300, [0, 1, 3, 2, 4, 5]) + del reshape_300 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_130 = paddle._C_ops.reshape(transpose_90, full_int_array_62) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_131 = paddle._C_ops.reshape(reshape_130, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_15 = paddle._C_ops.full( + [], + float("0.93913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_162 = full_15 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_26 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_204 = paddle._C_ops.add(full_15, uniform_26) + del uniform_26 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_26 = paddle._C_ops.floor(add_204) + del add_204 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_26 = paddle._C_ops.divide(reshape_131, full_15) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_26 = paddle._C_ops.multiply(divide_26, floor_26) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_101 = paddle._C_ops.add(add_98, multiply_26) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del parameter_118 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_102 = paddle._C_ops.add(matmul_75, parameter_117) + del parameter_117 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_14 = paddle._C_ops.gelu(add_102, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_76 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del parameter_116 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_76, parameter_115) + del parameter_115 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_27 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_205 = paddle._C_ops.add(full_15, uniform_27) + del uniform_27 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_27 = paddle._C_ops.floor(add_205) + del add_205 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_27 = paddle._C_ops.divide(add_103, full_15) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_27 = paddle._C_ops.multiply(divide_27, floor_27) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_104 = paddle._C_ops.add(add_101, multiply_27) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_104, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_132 = paddle._C_ops.reshape(layer_norm_99, full_int_array_55) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_132, full_int_array_4, [1, 2]) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_301 = paddle._C_ops.reshape(roll_14, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_91 = paddle._C_ops.transpose(reshape_301, [0, 1, 3, 2, 4, 5]) + del reshape_301 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_91, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_41 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_41, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_302 = paddle._C_ops.reshape(set_value__7, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_153 = paddle._C_ops.transpose(reshape_302, [0, 1, 3, 2, 4, 5]) + del reshape_302 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_303 = paddle._C_ops.reshape(transpose_153, full_int_array_36) + del transpose_153 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_304 = paddle._C_ops.reshape(reshape_303, full_int_array_37) + del reshape_303 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_2) + del reshape_304 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_58, unsqueeze_59) + del unsqueeze_58, unsqueeze_59 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_35, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_15 = paddle._C_ops.where(equal_7, full_36, where_14) + del equal_7, where_14 + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_77 = paddle._C_ops.matmul(reshape_134, parameter_112, False, False) + del parameter_112 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_105 = paddle._C_ops.add(matmul_77, parameter_111) + del parameter_111 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_305 = paddle._C_ops.reshape(add_105, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_92 = paddle._C_ops.transpose(reshape_305, [2, 0, 3, 1, 4]) + del reshape_305 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_54, full_0, float("0"), True) + del slice_54 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_93 = paddle._C_ops.transpose(slice_55, [0, 1, 3, 2]) + del slice_55 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_78 = paddle._C_ops.matmul(scale_15, transpose_93, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_135 = paddle._C_ops.reshape(data_3, full_int_array_15) + del data_3 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_15 = paddle._C_ops.index_select(data_4, reshape_135, 0) + del data_4 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_306 = paddle._C_ops.reshape(index_select_15, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_94 = paddle._C_ops.transpose(reshape_306, [2, 0, 1]) + del reshape_306 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(transpose_94, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_106 = paddle._C_ops.add(matmul_78, unsqueeze_22) + + # pd_op.reshape: (8x4x24x49x49xf32) <- (32x24x49x49xf32, 5xi64) + reshape_136 = paddle._C_ops.reshape(add_106, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(where_15, full_int_array_1) + del where_15 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_60, full_int_array_0) + del unsqueeze_60 + + # pd_op.add: (8x4x24x49x49xf32) <- (8x4x24x49x49xf32, 1x4x1x49x49xf32) + add_107 = paddle._C_ops.add(reshape_136, unsqueeze_23) + + # pd_op.reshape: (32x24x49x49xf32) <- (8x4x24x49x49xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(add_107, full_int_array_66) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_15 = paddle._C_ops.softmax(reshape_307, -1) + del reshape_307 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_139 = paddle._C_ops.matmul(softmax_15, slice_15, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_95 = paddle._C_ops.transpose(matmul_139, [0, 2, 1, 3]) + del matmul_139 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_137 = paddle._C_ops.reshape(transpose_95, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_79 = paddle._C_ops.matmul(reshape_137, parameter_110, False, False) + del parameter_110 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_108 = paddle._C_ops.add(matmul_79, parameter_109) + del parameter_109 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_138 = paddle._C_ops.reshape(add_108, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_308 = paddle._C_ops.reshape(reshape_138, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_96 = paddle._C_ops.transpose(reshape_308, [0, 1, 3, 2, 4, 5]) + del reshape_308 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_139 = paddle._C_ops.reshape(transpose_96, full_int_array_62) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_139, full_int_array_5, [1, 2]) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_140 = paddle._C_ops.reshape(roll_15, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_16 = paddle._C_ops.full( + [], + float("0.934783"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_173 = full_16 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_28 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_206 = paddle._C_ops.add(full_16, uniform_28) + del uniform_28 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_28 = paddle._C_ops.floor(add_206) + del add_206 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_28 = paddle._C_ops.divide(reshape_140, full_16) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_28 = paddle._C_ops.multiply(divide_28, floor_28) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_109 = paddle._C_ops.add(add_104, multiply_28) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_109, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del parameter_106 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_110 = paddle._C_ops.add(matmul_80, parameter_105) + del parameter_105 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_15 = paddle._C_ops.gelu(add_110, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_81 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del parameter_104 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_111 = paddle._C_ops.add(matmul_81, parameter_103) + del parameter_103 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_29 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_207 = paddle._C_ops.add(full_16, uniform_29) + del uniform_29 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_29 = paddle._C_ops.floor(add_207) + del add_207 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_29 = paddle._C_ops.divide(add_111, full_16) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_29 = paddle._C_ops.multiply(divide_29, floor_29) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_112 = paddle._C_ops.add(add_109, multiply_29) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_112, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(layer_norm_105, full_int_array_55) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_309 = paddle._C_ops.reshape(reshape_141, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_97 = paddle._C_ops.transpose(reshape_309, [0, 1, 3, 2, 4, 5]) + del reshape_309 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_142 = paddle._C_ops.reshape(transpose_97, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_143 = paddle._C_ops.reshape(reshape_142, full_int_array_58) + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_82 = paddle._C_ops.matmul(reshape_143, parameter_100, False, False) + del parameter_100 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_113 = paddle._C_ops.add(matmul_82, parameter_99) + del parameter_99 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_310 = paddle._C_ops.reshape(add_113, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_98 = paddle._C_ops.transpose(reshape_310, [2, 0, 3, 1, 4]) + del reshape_310 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_56, full_0, float("0"), True) + del slice_56 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_99 = paddle._C_ops.transpose(slice_57, [0, 1, 3, 2]) + del slice_57 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_83 = paddle._C_ops.matmul(scale_16, transpose_99, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_144 = paddle._C_ops.reshape(data_5, full_int_array_15) + del data_5 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_16 = paddle._C_ops.index_select(data_6, reshape_144, 0) + del data_6 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_311 = paddle._C_ops.reshape(index_select_16, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_100 = paddle._C_ops.transpose(reshape_311, [2, 0, 1]) + del reshape_311 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_100, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_208 = paddle._C_ops.add(matmul_83, unsqueeze_24) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_16 = paddle._C_ops.softmax(add_208, -1) + del add_208 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_140 = paddle._C_ops.matmul(softmax_16, slice_16, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_101 = paddle._C_ops.transpose(matmul_140, [0, 2, 1, 3]) + del matmul_140 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_145 = paddle._C_ops.reshape(transpose_101, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_84 = paddle._C_ops.matmul(reshape_145, parameter_98, False, False) + del parameter_98 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_114 = paddle._C_ops.add(matmul_84, parameter_97) + del parameter_97 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_114, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_312 = paddle._C_ops.reshape(reshape_146, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_102 = paddle._C_ops.transpose(reshape_312, [0, 1, 3, 2, 4, 5]) + del reshape_312 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_147 = paddle._C_ops.reshape(transpose_102, full_int_array_62) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_148 = paddle._C_ops.reshape(reshape_147, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_17 = paddle._C_ops.full( + [], + float("0.930435"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_182 = full_17 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_30 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_209 = paddle._C_ops.add(full_17, uniform_30) + del uniform_30 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_30 = paddle._C_ops.floor(add_209) + del add_209 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_30 = paddle._C_ops.divide(reshape_148, full_17) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_30 = paddle._C_ops.multiply(divide_30, floor_30) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_115 = paddle._C_ops.add(add_112, multiply_30) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_115, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_85 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del parameter_94 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_116 = paddle._C_ops.add(matmul_85, parameter_93) + del parameter_93 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_16 = paddle._C_ops.gelu(add_116, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_86 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del parameter_92 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_117 = paddle._C_ops.add(matmul_86, parameter_91) + del parameter_91 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_31 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_210 = paddle._C_ops.add(full_17, uniform_31) + del uniform_31 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_31 = paddle._C_ops.floor(add_210) + del add_210 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_31 = paddle._C_ops.divide(add_117, full_17) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_31 = paddle._C_ops.multiply(divide_31, floor_31) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_118 = paddle._C_ops.add(add_115, multiply_31) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_149 = paddle._C_ops.reshape(layer_norm_111, full_int_array_55) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_149, full_int_array_4, [1, 2]) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(roll_16, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_103 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_103, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(reshape_150, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_42 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_42, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_314 = paddle._C_ops.reshape(set_value__8, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_154 = paddle._C_ops.transpose(reshape_314, [0, 1, 3, 2, 4, 5]) + del reshape_314 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_315 = paddle._C_ops.reshape(transpose_154, full_int_array_36) + del transpose_154 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_316 = paddle._C_ops.reshape(reshape_315, full_int_array_37) + del reshape_315 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_2) + del reshape_316 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_35, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_17 = paddle._C_ops.where(equal_8, full_36, where_16) + del equal_8, where_16 + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_87 = paddle._C_ops.matmul(reshape_151, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_119 = paddle._C_ops.add(matmul_87, parameter_87) + del parameter_87 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_317 = paddle._C_ops.reshape(add_119, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_104 = paddle._C_ops.transpose(reshape_317, [2, 0, 3, 1, 4]) + del reshape_317 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_58, full_0, float("0"), True) + del slice_58 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_105 = paddle._C_ops.transpose(slice_59, [0, 1, 3, 2]) + del slice_59 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_88 = paddle._C_ops.matmul(scale_17, transpose_105, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_152 = paddle._C_ops.reshape(data_7, full_int_array_15) + del data_7 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_17 = paddle._C_ops.index_select(data_8, reshape_152, 0) + del data_8 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_17, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_106 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(transpose_106, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_120 = paddle._C_ops.add(matmul_88, unsqueeze_25) + + # pd_op.reshape: (8x4x24x49x49xf32) <- (32x24x49x49xf32, 5xi64) + reshape_153 = paddle._C_ops.reshape(add_120, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(where_17, full_int_array_1) + del where_17 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(unsqueeze_63, full_int_array_0) + del unsqueeze_63 + + # pd_op.add: (8x4x24x49x49xf32) <- (8x4x24x49x49xf32, 1x4x1x49x49xf32) + add_121 = paddle._C_ops.add(reshape_153, unsqueeze_26) + + # pd_op.reshape: (32x24x49x49xf32) <- (8x4x24x49x49xf32, 4xi64) + reshape_319 = paddle._C_ops.reshape(add_121, full_int_array_66) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_17 = paddle._C_ops.softmax(reshape_319, -1) + del reshape_319 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_141 = paddle._C_ops.matmul(softmax_17, slice_17, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_107 = paddle._C_ops.transpose(matmul_141, [0, 2, 1, 3]) + del matmul_141 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_154 = paddle._C_ops.reshape(transpose_107, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(reshape_154, parameter_86, False, False) + del parameter_86 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_122 = paddle._C_ops.add(matmul_89, parameter_85) + del parameter_85 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_155 = paddle._C_ops.reshape(add_122, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_320 = paddle._C_ops.reshape(reshape_155, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_108 = paddle._C_ops.transpose(reshape_320, [0, 1, 3, 2, 4, 5]) + del reshape_320 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_156 = paddle._C_ops.reshape(transpose_108, full_int_array_62) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_156, full_int_array_5, [1, 2]) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_157 = paddle._C_ops.reshape(roll_17, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_18 = paddle._C_ops.full( + [], + float("0.926087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_193 = full_18 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_32 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_211 = paddle._C_ops.add(full_18, uniform_32) + del uniform_32 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_32 = paddle._C_ops.floor(add_211) + del add_211 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_32 = paddle._C_ops.divide(reshape_157, full_18) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_32 = paddle._C_ops.multiply(divide_32, floor_32) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_123 = paddle._C_ops.add(add_118, multiply_32) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_123, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del parameter_82 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_124 = paddle._C_ops.add(matmul_90, parameter_81) + del parameter_81 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_17 = paddle._C_ops.gelu(add_124, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_91 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del parameter_80 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_125 = paddle._C_ops.add(matmul_91, parameter_79) + del parameter_79 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_33 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_212 = paddle._C_ops.add(full_18, uniform_33) + del uniform_33 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_33 = paddle._C_ops.floor(add_212) + del add_212 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_33 = paddle._C_ops.divide(add_125, full_18) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_33 = paddle._C_ops.multiply(divide_33, floor_33) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_126 = paddle._C_ops.add(add_123, multiply_33) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_126, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_158 = paddle._C_ops.reshape(layer_norm_117, full_int_array_55) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_321 = paddle._C_ops.reshape(reshape_158, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_109 = paddle._C_ops.transpose(reshape_321, [0, 1, 3, 2, 4, 5]) + del reshape_321 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_159 = paddle._C_ops.reshape(transpose_109, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_160 = paddle._C_ops.reshape(reshape_159, full_int_array_58) + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_92 = paddle._C_ops.matmul(reshape_160, parameter_76, False, False) + del parameter_76 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_127 = paddle._C_ops.add(matmul_92, parameter_75) + del parameter_75 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_322 = paddle._C_ops.reshape(add_127, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_110 = paddle._C_ops.transpose(reshape_322, [2, 0, 3, 1, 4]) + del reshape_322 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_60, full_0, float("0"), True) + del slice_60 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_111 = paddle._C_ops.transpose(slice_61, [0, 1, 3, 2]) + del slice_61 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_93 = paddle._C_ops.matmul(scale_18, transpose_111, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_161 = paddle._C_ops.reshape(data_9, full_int_array_15) + del data_9 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_18 = paddle._C_ops.index_select(data_10, reshape_161, 0) + del data_10 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_323 = paddle._C_ops.reshape(index_select_18, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_112 = paddle._C_ops.transpose(reshape_323, [2, 0, 1]) + del reshape_323 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_112, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_213 = paddle._C_ops.add(matmul_93, unsqueeze_27) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_18 = paddle._C_ops.softmax(add_213, -1) + del add_213 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_142 = paddle._C_ops.matmul(softmax_18, slice_18, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_113 = paddle._C_ops.transpose(matmul_142, [0, 2, 1, 3]) + del matmul_142 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_162 = paddle._C_ops.reshape(transpose_113, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_94 = paddle._C_ops.matmul(reshape_162, parameter_74, False, False) + del parameter_74 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_128 = paddle._C_ops.add(matmul_94, parameter_73) + del parameter_73 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_163 = paddle._C_ops.reshape(add_128, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_324 = paddle._C_ops.reshape(reshape_163, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_114 = paddle._C_ops.transpose(reshape_324, [0, 1, 3, 2, 4, 5]) + del reshape_324 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(transpose_114, full_int_array_62) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_165 = paddle._C_ops.reshape(reshape_164, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_19 = paddle._C_ops.full( + [], + float("0.921739"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_202 = full_19 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_34 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_214 = paddle._C_ops.add(full_19, uniform_34) + del uniform_34 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_34 = paddle._C_ops.floor(add_214) + del add_214 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_34 = paddle._C_ops.divide(reshape_165, full_19) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_34 = paddle._C_ops.multiply(divide_34, floor_34) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_129 = paddle._C_ops.add(add_126, multiply_34) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_129, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del parameter_70 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_130 = paddle._C_ops.add(matmul_95, parameter_69) + del parameter_69 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_18 = paddle._C_ops.gelu(add_130, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_96 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del parameter_68 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_131 = paddle._C_ops.add(matmul_96, parameter_67) + del parameter_67 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_35 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_215 = paddle._C_ops.add(full_19, uniform_35) + del uniform_35 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_35 = paddle._C_ops.floor(add_215) + del add_215 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_35 = paddle._C_ops.divide(add_131, full_19) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_35 = paddle._C_ops.multiply(divide_35, floor_35) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_132 = paddle._C_ops.add(add_129, multiply_35) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_132, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(layer_norm_123, full_int_array_55) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_166, full_int_array_4, [1, 2]) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_325 = paddle._C_ops.reshape(roll_18, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_115 = paddle._C_ops.transpose(reshape_325, [0, 1, 3, 2, 4, 5]) + del reshape_325 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_167 = paddle._C_ops.reshape(transpose_115, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_168 = paddle._C_ops.reshape(reshape_167, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_43 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_43, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_43 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_326 = paddle._C_ops.reshape(set_value__9, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_155 = paddle._C_ops.transpose(reshape_326, [0, 1, 3, 2, 4, 5]) + del reshape_326 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(transpose_155, full_int_array_36) + del transpose_155 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_328 = paddle._C_ops.reshape(reshape_327, full_int_array_37) + del reshape_327 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_2) + del reshape_328 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_64, unsqueeze_65) + del unsqueeze_64, unsqueeze_65 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_35, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_19 = paddle._C_ops.where(equal_9, full_36, where_18) + del equal_9, where_18 + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_97 = paddle._C_ops.matmul(reshape_168, parameter_64, False, False) + del parameter_64 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_133 = paddle._C_ops.add(matmul_97, parameter_63) + del parameter_63 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_329 = paddle._C_ops.reshape(add_133, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_116 = paddle._C_ops.transpose(reshape_329, [2, 0, 3, 1, 4]) + del reshape_329 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_62, full_0, float("0"), True) + del slice_62 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_117 = paddle._C_ops.transpose(slice_63, [0, 1, 3, 2]) + del slice_63 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_98 = paddle._C_ops.matmul(scale_19, transpose_117, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_169 = paddle._C_ops.reshape(data_11, full_int_array_15) + del data_11 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_19 = paddle._C_ops.index_select(data_12, reshape_169, 0) + del data_12 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_330 = paddle._C_ops.reshape(index_select_19, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_118 = paddle._C_ops.transpose(reshape_330, [2, 0, 1]) + del reshape_330 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(transpose_118, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_134 = paddle._C_ops.add(matmul_98, unsqueeze_28) + + # pd_op.reshape: (8x4x24x49x49xf32) <- (32x24x49x49xf32, 5xi64) + reshape_170 = paddle._C_ops.reshape(add_134, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(where_19, full_int_array_1) + del where_19 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_66, full_int_array_0) + del unsqueeze_66 + + # pd_op.add: (8x4x24x49x49xf32) <- (8x4x24x49x49xf32, 1x4x1x49x49xf32) + add_135 = paddle._C_ops.add(reshape_170, unsqueeze_29) + + # pd_op.reshape: (32x24x49x49xf32) <- (8x4x24x49x49xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(add_135, full_int_array_66) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_19 = paddle._C_ops.softmax(reshape_331, -1) + del reshape_331 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_19, slice_19, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_119 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_171 = paddle._C_ops.reshape(transpose_119, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_99 = paddle._C_ops.matmul(reshape_171, parameter_62, False, False) + del parameter_62 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_136 = paddle._C_ops.add(matmul_99, parameter_61) + del parameter_61 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_172 = paddle._C_ops.reshape(add_136, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_332 = paddle._C_ops.reshape(reshape_172, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_120 = paddle._C_ops.transpose(reshape_332, [0, 1, 3, 2, 4, 5]) + del reshape_332 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_173 = paddle._C_ops.reshape(transpose_120, full_int_array_62) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_173, full_int_array_5, [1, 2]) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_174 = paddle._C_ops.reshape(roll_19, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_20 = paddle._C_ops.full( + [], + float("0.917391"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_213 = full_20 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_36 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_216 = paddle._C_ops.add(full_20, uniform_36) + del uniform_36 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_36 = paddle._C_ops.floor(add_216) + del add_216 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_36 = paddle._C_ops.divide(reshape_174, full_20) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_36 = paddle._C_ops.multiply(divide_36, floor_36) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_137 = paddle._C_ops.add(add_132, multiply_36) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_137, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_100 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del parameter_58 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_138 = paddle._C_ops.add(matmul_100, parameter_57) + del parameter_57 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_19 = paddle._C_ops.gelu(add_138, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_101 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del parameter_56 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_139 = paddle._C_ops.add(matmul_101, parameter_55) + del parameter_55 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_37 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_217 = paddle._C_ops.add(full_20, uniform_37) + del uniform_37 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_37 = paddle._C_ops.floor(add_217) + del add_217 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_37 = paddle._C_ops.divide(add_139, full_20) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_37 = paddle._C_ops.multiply(divide_37, floor_37) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_140 = paddle._C_ops.add(add_137, multiply_37) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(layer_norm_129, full_int_array_55) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_333 = paddle._C_ops.reshape(reshape_175, full_int_array_56) + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_121 = paddle._C_ops.transpose(reshape_333, [0, 1, 3, 2, 4, 5]) + del reshape_333 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_176 = paddle._C_ops.reshape(transpose_121, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_177 = paddle._C_ops.reshape(reshape_176, full_int_array_58) + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_102 = paddle._C_ops.matmul(reshape_177, parameter_52, False, False) + del parameter_52 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_141 = paddle._C_ops.add(matmul_102, parameter_51) + del parameter_51 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_334 = paddle._C_ops.reshape(add_141, full_int_array_59) + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_122 = paddle._C_ops.transpose(reshape_334, [2, 0, 3, 1, 4]) + del reshape_334 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_64, full_0, float("0"), True) + del slice_64 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_123 = paddle._C_ops.transpose(slice_65, [0, 1, 3, 2]) + del slice_65 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_103 = paddle._C_ops.matmul(scale_20, transpose_123, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_178 = paddle._C_ops.reshape(data_13, full_int_array_15) + del data_13 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_20 = paddle._C_ops.index_select(data_14, reshape_178, 0) + del data_14 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_20, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_124 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_124, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_218 = paddle._C_ops.add(matmul_103, unsqueeze_30) + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_20 = paddle._C_ops.softmax(add_218, -1) + del add_218 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_144 = paddle._C_ops.matmul(softmax_20, slice_20, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_125 = paddle._C_ops.transpose(matmul_144, [0, 2, 1, 3]) + del matmul_144 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_179 = paddle._C_ops.reshape(transpose_125, full_int_array_60) + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_104 = paddle._C_ops.matmul(reshape_179, parameter_50, False, False) + del parameter_50 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_142 = paddle._C_ops.add(matmul_104, parameter_49) + del parameter_49 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_180 = paddle._C_ops.reshape(add_142, full_int_array_57) + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_336 = paddle._C_ops.reshape(reshape_180, full_int_array_61) + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_126 = paddle._C_ops.transpose(reshape_336, [0, 1, 3, 2, 4, 5]) + del reshape_336 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(transpose_126, full_int_array_62) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_21 = paddle._C_ops.full( + [], + float("0.913043"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_222 = full_21 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_38 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_219 = paddle._C_ops.add(full_21, uniform_38) + del uniform_38 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_38 = paddle._C_ops.floor(add_219) + del add_219 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_38 = paddle._C_ops.divide(reshape_182, full_21) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_38 = paddle._C_ops.multiply(divide_38, floor_38) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_143 = paddle._C_ops.add(add_140, multiply_38) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_105 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del parameter_46 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_144 = paddle._C_ops.add(matmul_105, parameter_45) + del parameter_45 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_20 = paddle._C_ops.gelu(add_144, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_106 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del parameter_44 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_145 = paddle._C_ops.add(matmul_106, parameter_43) + del parameter_43 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_39 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_220 = paddle._C_ops.add(full_21, uniform_39) + del uniform_39 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_39 = paddle._C_ops.floor(add_220) + del add_220 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_39 = paddle._C_ops.divide(add_145, full_21) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_39 = paddle._C_ops.multiply(divide_39, floor_39) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_146 = paddle._C_ops.add(add_143, multiply_39) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_146, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(layer_norm_135, full_int_array_55) + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_183, full_int_array_4, [1, 2]) + + # pd_op.reshape: (8x2x7x2x7x768xf32) <- (8x14x14x768xf32, 6xi64) + reshape_337 = paddle._C_ops.reshape(roll_20, full_int_array_56) + del full_int_array_56 + + # pd_op.transpose: (8x2x2x7x7x768xf32) <- (8x2x7x2x7x768xf32) + transpose_127 = paddle._C_ops.transpose(reshape_337, [0, 1, 3, 2, 4, 5]) + del reshape_337 + + # pd_op.reshape: (32x7x7x768xf32) <- (8x2x2x7x7x768xf32, 4xi64) + reshape_184 = paddle._C_ops.reshape(transpose_127, full_int_array_57) + + # pd_op.reshape: (32x49x768xf32) <- (32x7x7x768xf32, 3xi64) + reshape_185 = paddle._C_ops.reshape(reshape_184, full_int_array_58) + del full_int_array_58 + + # pd_op.full: (1x14x14x1xf32) <- () + full_44 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_44, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(set_value__10, full_int_array_64) + del full_int_array_64 + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_156 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_156, full_int_array_36) + del transpose_156 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, full_int_array_37) + del reshape_339 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_2) + del reshape_340 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_35, subtract_10) + del full_35, not_equal_10, subtract_10 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_21 = paddle._C_ops.where(equal_10, full_36, where_20) + del equal_10, full_36, where_20 + + # pd_op.matmul: (32x49x2304xf32) <- (32x49x768xf32, 768x2304xf32) + matmul_107 = paddle._C_ops.matmul(reshape_185, parameter_40, False, False) + del parameter_40 + + # pd_op.add: (32x49x2304xf32) <- (32x49x2304xf32, 2304xf32) + add_147 = paddle._C_ops.add(matmul_107, parameter_39) + del parameter_39 + + # pd_op.reshape: (32x49x3x24x32xf32) <- (32x49x2304xf32, 5xi64) + reshape_341 = paddle._C_ops.reshape(add_147, full_int_array_59) + del full_int_array_59 + + # pd_op.transpose: (3x32x24x49x32xf32) <- (32x49x3x24x32xf32) + transpose_128 = paddle._C_ops.transpose(reshape_341, [2, 0, 3, 1, 4]) + del reshape_341 + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (32x24x49x32xf32) <- (3x32x24x49x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (32x24x49x32xf32) <- (32x24x49x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_66, full_0, float("0"), True) + del slice_66 + + # pd_op.transpose: (32x24x32x49xf32) <- (32x24x49x32xf32) + transpose_129 = paddle._C_ops.transpose(slice_67, [0, 1, 3, 2]) + del slice_67 + + # pd_op.matmul: (32x24x49x49xf32) <- (32x24x49x32xf32, 32x24x32x49xf32) + matmul_108 = paddle._C_ops.matmul(scale_21, transpose_129, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_15, full_int_array_15) + del data_15 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_21 = paddle._C_ops.index_select(data_16, reshape_186, 0) + del data_16 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_342 = paddle._C_ops.reshape(index_select_21, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_130 = paddle._C_ops.transpose(reshape_342, [2, 0, 1]) + del reshape_342 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(transpose_130, full_int_array_0) + + # pd_op.add: (32x24x49x49xf32) <- (32x24x49x49xf32, 1x24x49x49xf32) + add_148 = paddle._C_ops.add(matmul_108, unsqueeze_31) + + # pd_op.reshape: (8x4x24x49x49xf32) <- (32x24x49x49xf32, 5xi64) + reshape_187 = paddle._C_ops.reshape(add_148, full_int_array_65) + del full_int_array_65 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(where_21, full_int_array_1) + del where_21 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(unsqueeze_69, full_int_array_0) + del unsqueeze_69 + + # pd_op.add: (8x4x24x49x49xf32) <- (8x4x24x49x49xf32, 1x4x1x49x49xf32) + add_149 = paddle._C_ops.add(reshape_187, unsqueeze_32) + + # pd_op.reshape: (32x24x49x49xf32) <- (8x4x24x49x49xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(add_149, full_int_array_66) + del full_int_array_66 + + # pd_op.softmax: (32x24x49x49xf32) <- (32x24x49x49xf32) + softmax_21 = paddle._C_ops.softmax(reshape_343, -1) + del reshape_343 + + # pd_op.matmul: (32x24x49x32xf32) <- (32x24x49x49xf32, 32x24x49x32xf32) + matmul_145 = paddle._C_ops.matmul(softmax_21, slice_21, False, False) + + # pd_op.transpose: (32x49x24x32xf32) <- (32x24x49x32xf32) + transpose_131 = paddle._C_ops.transpose(matmul_145, [0, 2, 1, 3]) + del matmul_145 + + # pd_op.reshape: (32x49x768xf32) <- (32x49x24x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_131, full_int_array_60) + del full_int_array_60 + + # pd_op.matmul: (32x49x768xf32) <- (32x49x768xf32, 768x768xf32) + matmul_109 = paddle._C_ops.matmul(reshape_188, parameter_38, False, False) + del parameter_38 + + # pd_op.add: (32x49x768xf32) <- (32x49x768xf32, 768xf32) + add_150 = paddle._C_ops.add(matmul_109, parameter_37) + del parameter_37 + + # pd_op.reshape: (32x7x7x768xf32) <- (32x49x768xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_150, full_int_array_57) + del full_int_array_57 + + # pd_op.reshape: (8x2x2x7x7x768xf32) <- (32x7x7x768xf32, 6xi64) + reshape_344 = paddle._C_ops.reshape(reshape_189, full_int_array_61) + del full_int_array_61 + + # pd_op.transpose: (8x2x7x2x7x768xf32) <- (8x2x2x7x7x768xf32) + transpose_132 = paddle._C_ops.transpose(reshape_344, [0, 1, 3, 2, 4, 5]) + del reshape_344 + + # pd_op.reshape: (8x14x14x768xf32) <- (8x2x7x2x7x768xf32, 4xi64) + reshape_190 = paddle._C_ops.reshape(transpose_132, full_int_array_62) + del full_int_array_62 + + # pd_op.roll: (8x14x14x768xf32) <- (8x14x14x768xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_190, full_int_array_5, [1, 2]) + + # pd_op.reshape: (8x196x768xf32) <- (8x14x14x768xf32, 3xi64) + reshape_191 = paddle._C_ops.reshape(roll_21, full_int_array_63) + del full_int_array_63 + + # pd_op.full: (xf32) <- () + full_22 = paddle._C_ops.full( + [], + float("0.908696"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_233 = full_22 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_40 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_221 = paddle._C_ops.add(full_22, uniform_40) + del uniform_40 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_40 = paddle._C_ops.floor(add_221) + del add_221 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_40 = paddle._C_ops.divide(reshape_191, full_22) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_40 = paddle._C_ops.multiply(divide_40, floor_40) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_151 = paddle._C_ops.add(add_146, multiply_40) + + # pd_op.layer_norm: (8x196x768xf32, 8x196xf32, 8x196xf32) <- (8x196x768xf32, 768xf32, 768xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (8x196x3072xf32) <- (8x196x768xf32, 768x3072xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del parameter_34 + + # pd_op.add: (8x196x3072xf32) <- (8x196x3072xf32, 3072xf32) + add_152 = paddle._C_ops.add(matmul_110, parameter_33) + del parameter_33 + + # pd_op.gelu: (8x196x3072xf32) <- (8x196x3072xf32) + gelu_21 = paddle._C_ops.gelu(add_152, False) + + # pd_op.matmul: (8x196x768xf32) <- (8x196x3072xf32, 3072x768xf32) + matmul_111 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del parameter_32 + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 768xf32) + add_153 = paddle._C_ops.add(matmul_111, parameter_31) + del parameter_31 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_41 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_222 = paddle._C_ops.add(full_22, uniform_41) + del uniform_41 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_41 = paddle._C_ops.floor(add_222) + del add_222 + + # pd_op.divide: (8x196x768xf32) <- (8x196x768xf32, xf32) + divide_41 = paddle._C_ops.divide(add_153, full_22) + + # pd_op.multiply: (8x196x768xf32) <- (8x196x768xf32, 8x1x1xf32) + multiply_41 = paddle._C_ops.multiply(divide_41, floor_41) + + # pd_op.add: (8x196x768xf32) <- (8x196x768xf32, 8x196x768xf32) + add_154 = paddle._C_ops.add(add_151, multiply_41) + + # pd_op.reshape: (8x14x14x768xf32) <- (8x196x768xf32, 4xi64) + reshape_192 = paddle._C_ops.reshape(add_154, full_int_array_55) + + # pd_op.strided_slice: (8x7x7x768xf32) <- (8x14x14x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (8x7x7x768xf32) <- (8x14x14x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (8x7x7x768xf32) <- (8x14x14x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (8x7x7x768xf32) <- (8x14x14x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (8x14x14x768xf32) <- (8x14x14x768xf32, 4xi64) + reshape_345 = paddle._C_ops.reshape(reshape_192, full_int_array_55) + del full_int_array_55 + + # builtin.combine: ([8x7x7x768xf32, 8x7x7x768xf32, 8x7x7x768xf32, 8x7x7x768xf32]) <- (8x7x7x768xf32, 8x7x7x768xf32, 8x7x7x768xf32, 8x7x7x768xf32) + combine_2 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + + # pd_op.concat: (8x7x7x3072xf32) <- ([8x7x7x768xf32, 8x7x7x768xf32, 8x7x7x768xf32, 8x7x7x768xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_2, full_2) + del combine_2 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_67 = [8, -1, 3072] + + # pd_op.reshape: (8x49x3072xf32) <- (8x7x7x3072xf32, 3xi64) + reshape_193 = paddle._C_ops.reshape(concat_2, full_int_array_67) + del full_int_array_67 + + # pd_op.layer_norm: (8x49x3072xf32, 8x49xf32, 8x49xf32) <- (8x49x3072xf32, 3072xf32, 3072xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_193, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30 + + # pd_op.matmul: (8x49x1536xf32) <- (8x49x3072xf32, 3072x1536xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del parameter_28 + + # pd_op.layer_norm: (8x49x1536xf32, 8x49xf32, 8x49xf32) <- (8x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_112, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_68 = [8, 7, 7, 1536] + + # pd_op.reshape: (8x7x7x1536xf32) <- (8x49x1536xf32, 4xi64) + reshape_194 = paddle._C_ops.reshape(layer_norm_144, full_int_array_68) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_69 = [8, 1, 7, 1, 7, 1536] + + # pd_op.reshape: (8x1x7x1x7x1536xf32) <- (8x7x7x1536xf32, 6xi64) + reshape_346 = paddle._C_ops.reshape(reshape_194, full_int_array_69) + + # pd_op.transpose: (8x1x1x7x7x1536xf32) <- (8x1x7x1x7x1536xf32) + transpose_133 = paddle._C_ops.transpose(reshape_346, [0, 1, 3, 2, 4, 5]) + del reshape_346 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_70 = [-1, 7, 7, 1536] + + # pd_op.reshape: (8x7x7x1536xf32) <- (8x1x1x7x7x1536xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_133, full_int_array_70) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_71 = [-1, 49, 1536] + + # pd_op.reshape: (8x49x1536xf32) <- (8x7x7x1536xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_71) + + # pd_op.matmul: (8x49x4608xf32) <- (8x49x1536xf32, 1536x4608xf32) + matmul_113 = paddle._C_ops.matmul(reshape_196, parameter_25, False, False) + del parameter_25 + + # pd_op.add: (8x49x4608xf32) <- (8x49x4608xf32, 4608xf32) + add_155 = paddle._C_ops.add(matmul_113, parameter_24) + del parameter_24 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_72 = [8, 49, 3, 48, 32] + + # pd_op.reshape: (8x49x3x48x32xf32) <- (8x49x4608xf32, 5xi64) + reshape_347 = paddle._C_ops.reshape(add_155, full_int_array_72) + + # pd_op.transpose: (3x8x48x49x32xf32) <- (8x49x3x48x32xf32) + transpose_134 = paddle._C_ops.transpose(reshape_347, [2, 0, 3, 1, 4]) + del reshape_347 + + # pd_op.slice: (8x48x49x32xf32) <- (3x8x48x49x32xf32, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (8x48x49x32xf32) <- (3x8x48x49x32xf32, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (8x48x49x32xf32) <- (3x8x48x49x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (8x48x49x32xf32) <- (8x48x49x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_68, full_0, float("0"), True) + del slice_68 + + # pd_op.transpose: (8x48x32x49xf32) <- (8x48x49x32xf32) + transpose_135 = paddle._C_ops.transpose(slice_69, [0, 1, 3, 2]) + del slice_69 + + # pd_op.matmul: (8x48x49x49xf32) <- (8x48x49x32xf32, 8x48x32x49xf32) + matmul_114 = paddle._C_ops.matmul(scale_22, transpose_135, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_197 = paddle._C_ops.reshape(data_18, full_int_array_15) + del data_18 + + # pd_op.index_select: (2401x48xf32) <- (169x48xf32, 2401xi64) + index_select_22 = paddle._C_ops.index_select(data_19, reshape_197, 0) + del data_19 + + # pd_op.reshape: (49x49x48xf32) <- (2401x48xf32, 3xi64) + reshape_348 = paddle._C_ops.reshape(index_select_22, full_int_array_16) + + # pd_op.transpose: (48x49x49xf32) <- (49x49x48xf32) + transpose_136 = paddle._C_ops.transpose(reshape_348, [2, 0, 1]) + del reshape_348 + + # pd_op.unsqueeze: (1x48x49x49xf32) <- (48x49x49xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_136, full_int_array_0) + + # pd_op.add: (8x48x49x49xf32) <- (8x48x49x49xf32, 1x48x49x49xf32) + add_223 = paddle._C_ops.add(matmul_114, unsqueeze_33) + + # pd_op.softmax: (8x48x49x49xf32) <- (8x48x49x49xf32) + softmax_22 = paddle._C_ops.softmax(add_223, -1) + del add_223 + + # pd_op.matmul: (8x48x49x32xf32) <- (8x48x49x49xf32, 8x48x49x32xf32) + matmul_146 = paddle._C_ops.matmul(softmax_22, slice_22, False, False) + + # pd_op.transpose: (8x49x48x32xf32) <- (8x48x49x32xf32) + transpose_137 = paddle._C_ops.transpose(matmul_146, [0, 2, 1, 3]) + del matmul_146 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_73 = [8, 49, 1536] + + # pd_op.reshape: (8x49x1536xf32) <- (8x49x48x32xf32, 3xi64) + reshape_198 = paddle._C_ops.reshape(transpose_137, full_int_array_73) + + # pd_op.matmul: (8x49x1536xf32) <- (8x49x1536xf32, 1536x1536xf32) + matmul_115 = paddle._C_ops.matmul(reshape_198, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (8x49x1536xf32) <- (8x49x1536xf32, 1536xf32) + add_156 = paddle._C_ops.add(matmul_115, parameter_22) + del parameter_22 + + # pd_op.reshape: (8x7x7x1536xf32) <- (8x49x1536xf32, 4xi64) + reshape_199 = paddle._C_ops.reshape(add_156, full_int_array_70) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_74 = [-1, 1, 1, 7, 7, 1536] + + # pd_op.reshape: (8x1x1x7x7x1536xf32) <- (8x7x7x1536xf32, 6xi64) + reshape_349 = paddle._C_ops.reshape(reshape_199, full_int_array_74) + + # pd_op.transpose: (8x1x7x1x7x1536xf32) <- (8x1x1x7x7x1536xf32) + transpose_138 = paddle._C_ops.transpose(reshape_349, [0, 1, 3, 2, 4, 5]) + del reshape_349 + + # pd_op.reshape: (8x7x7x1536xf32) <- (8x1x7x1x7x1536xf32, 4xi64) + reshape_200 = paddle._C_ops.reshape(transpose_138, full_int_array_70) + + # pd_op.reshape: (8x49x1536xf32) <- (8x7x7x1536xf32, 3xi64) + reshape_201 = paddle._C_ops.reshape(reshape_200, full_int_array_73) + + # pd_op.full: (xf32) <- () + full_23 = paddle._C_ops.full( + [], + float("0.904348"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_255 = full_23 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_42 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_224 = paddle._C_ops.add(full_23, uniform_42) + del uniform_42 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_42 = paddle._C_ops.floor(add_224) + del add_224 + + # pd_op.divide: (8x49x1536xf32) <- (8x49x1536xf32, xf32) + divide_42 = paddle._C_ops.divide(reshape_201, full_23) + + # pd_op.multiply: (8x49x1536xf32) <- (8x49x1536xf32, 8x1x1xf32) + multiply_42 = paddle._C_ops.multiply(divide_42, floor_42) + + # pd_op.add: (8x49x1536xf32) <- (8x49x1536xf32, 8x49x1536xf32) + add_157 = paddle._C_ops.add(matmul_112, multiply_42) + + # pd_op.layer_norm: (8x49x1536xf32, 8x49xf32, 8x49xf32) <- (8x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_157, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (8x49x6144xf32) <- (8x49x1536xf32, 1536x6144xf32) + matmul_116 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del parameter_19 + + # pd_op.add: (8x49x6144xf32) <- (8x49x6144xf32, 6144xf32) + add_158 = paddle._C_ops.add(matmul_116, parameter_18) + del parameter_18 + + # pd_op.gelu: (8x49x6144xf32) <- (8x49x6144xf32) + gelu_22 = paddle._C_ops.gelu(add_158, False) + + # pd_op.matmul: (8x49x1536xf32) <- (8x49x6144xf32, 6144x1536xf32) + matmul_117 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (8x49x1536xf32) <- (8x49x1536xf32, 1536xf32) + add_159 = paddle._C_ops.add(matmul_117, parameter_16) + del parameter_16 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_43 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_225 = paddle._C_ops.add(full_23, uniform_43) + del uniform_43 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_43 = paddle._C_ops.floor(add_225) + del add_225 + + # pd_op.divide: (8x49x1536xf32) <- (8x49x1536xf32, xf32) + divide_43 = paddle._C_ops.divide(add_159, full_23) + + # pd_op.multiply: (8x49x1536xf32) <- (8x49x1536xf32, 8x1x1xf32) + multiply_43 = paddle._C_ops.multiply(divide_43, floor_43) + + # pd_op.add: (8x49x1536xf32) <- (8x49x1536xf32, 8x49x1536xf32) + add_160 = paddle._C_ops.add(add_157, multiply_43) + + # pd_op.layer_norm: (8x49x1536xf32, 8x49xf32, 8x49xf32) <- (8x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_160, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # pd_op.reshape: (8x7x7x1536xf32) <- (8x49x1536xf32, 4xi64) + reshape_202 = paddle._C_ops.reshape(layer_norm_150, full_int_array_68) + del full_int_array_68 + + # pd_op.roll: (8x7x7x1536xf32) <- (8x7x7x1536xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_202, full_int_array_4, [1, 2]) + + # pd_op.reshape: (8x1x7x1x7x1536xf32) <- (8x7x7x1536xf32, 6xi64) + reshape_350 = paddle._C_ops.reshape(roll_22, full_int_array_69) + del full_int_array_69 + + # pd_op.transpose: (8x1x1x7x7x1536xf32) <- (8x1x7x1x7x1536xf32) + transpose_139 = paddle._C_ops.transpose(reshape_350, [0, 1, 3, 2, 4, 5]) + del reshape_350 + + # pd_op.reshape: (8x7x7x1536xf32) <- (8x1x1x7x7x1536xf32, 4xi64) + reshape_203 = paddle._C_ops.reshape(transpose_139, full_int_array_70) + + # pd_op.reshape: (8x49x1536xf32) <- (8x7x7x1536xf32, 3xi64) + reshape_204 = paddle._C_ops.reshape(reshape_203, full_int_array_71) + del full_int_array_71 + + # pd_op.full: (1x7x7x1xf32) <- () + full_45 = paddle._C_ops.full( + [1, 7, 7, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_45, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_45, full_int_array_21 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_24, set_value__100 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_26, full_int_array_27, set_value__101 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_28, set_value__102 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_22, set_value__103 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_25, full_int_array_30, set_value__104 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_31, full_int_array_32, set_value__105 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_29, full_int_array_33, set_value__106 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_34, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_75 = [1, 1, 7, 1, 7, 1] + + # pd_op.reshape: (1x1x7x1x7x1xf32) <- (1x7x7x1xf32, 6xi64) + reshape_351 = paddle._C_ops.reshape(set_value__11, full_int_array_75) + del full_int_array_75 + + # pd_op.transpose: (1x1x1x7x7x1xf32) <- (1x1x7x1x7x1xf32) + transpose_157 = paddle._C_ops.transpose(reshape_351, [0, 1, 3, 2, 4, 5]) + del reshape_351 + + # pd_op.reshape: (1x7x7x1xf32) <- (1x1x1x7x7x1xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(transpose_157, full_int_array_36) + del full_int_array_36, transpose_157 + + # pd_op.reshape: (1x49xf32) <- (1x7x7x1xf32, 2xi64) + reshape_353 = paddle._C_ops.reshape(reshape_352, full_int_array_37) + del full_int_array_37, reshape_352 + + # pd_op.unsqueeze: (1x1x49xf32) <- (1x49xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_1) + + # pd_op.unsqueeze: (1x49x1xf32) <- (1x49xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_2) + del reshape_353 + + # pd_op.subtract: (1x49x49xf32) <- (1x1x49xf32, 1x49x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_70, unsqueeze_71) + del unsqueeze_70, unsqueeze_71 + + # pd_op.not_equal: (1x49x49xb) <- (1x49x49xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_26) + + # pd_op.full: (1x49x49xf32) <- () + full_46 = paddle._C_ops.full( + [1, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_46, subtract_11) + del full_46, not_equal_11, subtract_11 + + # pd_op.equal: (1x49x49xb) <- (1x49x49xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_26) + del full_26 + + # pd_op.full: (1x49x49xf32) <- () + full_47 = paddle._C_ops.full( + [1, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_23 = paddle._C_ops.where(equal_11, full_47, where_22) + del equal_11, full_47, where_22 + + # pd_op.matmul: (8x49x4608xf32) <- (8x49x1536xf32, 1536x4608xf32) + matmul_118 = paddle._C_ops.matmul(reshape_204, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (8x49x4608xf32) <- (8x49x4608xf32, 4608xf32) + add_161 = paddle._C_ops.add(matmul_118, parameter_12) + del parameter_12 + + # pd_op.reshape: (8x49x3x48x32xf32) <- (8x49x4608xf32, 5xi64) + reshape_354 = paddle._C_ops.reshape(add_161, full_int_array_72) + del full_int_array_72 + + # pd_op.transpose: (3x8x48x49x32xf32) <- (8x49x3x48x32xf32) + transpose_140 = paddle._C_ops.transpose(reshape_354, [2, 0, 3, 1, 4]) + del reshape_354 + + # pd_op.slice: (8x48x49x32xf32) <- (3x8x48x49x32xf32, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (8x48x49x32xf32) <- (3x8x48x49x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (8x48x49x32xf32) <- (3x8x48x49x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (8x48x49x32xf32) <- (8x48x49x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_70, full_0, float("0"), True) + del slice_70 + + # pd_op.transpose: (8x48x32x49xf32) <- (8x48x49x32xf32) + transpose_141 = paddle._C_ops.transpose(slice_71, [0, 1, 3, 2]) + del slice_71 + + # pd_op.matmul: (8x48x49x49xf32) <- (8x48x49x32xf32, 8x48x32x49xf32) + matmul_119 = paddle._C_ops.matmul(scale_23, transpose_141, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_205 = paddle._C_ops.reshape(data_21, full_int_array_15) + del data_21, full_int_array_15 + + # pd_op.index_select: (2401x48xf32) <- (169x48xf32, 2401xi64) + index_select_23 = paddle._C_ops.index_select(data_22, reshape_205, 0) + del data_22 + + # pd_op.reshape: (49x49x48xf32) <- (2401x48xf32, 3xi64) + reshape_355 = paddle._C_ops.reshape(index_select_23, full_int_array_16) + del full_int_array_16 + + # pd_op.transpose: (48x49x49xf32) <- (49x49x48xf32) + transpose_142 = paddle._C_ops.transpose(reshape_355, [2, 0, 1]) + del reshape_355 + + # pd_op.unsqueeze: (1x48x49x49xf32) <- (48x49x49xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(transpose_142, full_int_array_0) + + # pd_op.add: (8x48x49x49xf32) <- (8x48x49x49xf32, 1x48x49x49xf32) + add_162 = paddle._C_ops.add(matmul_119, unsqueeze_34) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_76 = [8, 1, 48, 49, 49] + + # pd_op.reshape: (8x1x48x49x49xf32) <- (8x48x49x49xf32, 5xi64) + reshape_206 = paddle._C_ops.reshape(add_162, full_int_array_76) + del full_int_array_76 + + # pd_op.unsqueeze: (1x1x49x49xf32) <- (1x49x49xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(where_23, full_int_array_1) + del where_23 + + # pd_op.unsqueeze: (1x1x1x49x49xf32) <- (1x1x49x49xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_72, full_int_array_0) + del unsqueeze_72 + + # pd_op.add: (8x1x48x49x49xf32) <- (8x1x48x49x49xf32, 1x1x1x49x49xf32) + add_163 = paddle._C_ops.add(reshape_206, unsqueeze_35) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_77 = [8, 48, 49, 49] + + # pd_op.reshape: (8x48x49x49xf32) <- (8x1x48x49x49xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(add_163, full_int_array_77) + del full_int_array_77 + + # pd_op.softmax: (8x48x49x49xf32) <- (8x48x49x49xf32) + softmax_23 = paddle._C_ops.softmax(reshape_356, -1) + del reshape_356 + + # pd_op.matmul: (8x48x49x32xf32) <- (8x48x49x49xf32, 8x48x49x32xf32) + matmul_147 = paddle._C_ops.matmul(softmax_23, slice_23, False, False) + + # pd_op.transpose: (8x49x48x32xf32) <- (8x48x49x32xf32) + transpose_143 = paddle._C_ops.transpose(matmul_147, [0, 2, 1, 3]) + del matmul_147 + + # pd_op.reshape: (8x49x1536xf32) <- (8x49x48x32xf32, 3xi64) + reshape_207 = paddle._C_ops.reshape(transpose_143, full_int_array_73) + + # pd_op.matmul: (8x49x1536xf32) <- (8x49x1536xf32, 1536x1536xf32) + matmul_120 = paddle._C_ops.matmul(reshape_207, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (8x49x1536xf32) <- (8x49x1536xf32, 1536xf32) + add_164 = paddle._C_ops.add(matmul_120, parameter_10) + del parameter_10 + + # pd_op.reshape: (8x7x7x1536xf32) <- (8x49x1536xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(add_164, full_int_array_70) + + # pd_op.reshape: (8x1x1x7x7x1536xf32) <- (8x7x7x1536xf32, 6xi64) + reshape_357 = paddle._C_ops.reshape(reshape_208, full_int_array_74) + del full_int_array_74 + + # pd_op.transpose: (8x1x7x1x7x1536xf32) <- (8x1x1x7x7x1536xf32) + transpose_144 = paddle._C_ops.transpose(reshape_357, [0, 1, 3, 2, 4, 5]) + del reshape_357 + + # pd_op.reshape: (8x7x7x1536xf32) <- (8x1x7x1x7x1536xf32, 4xi64) + reshape_209 = paddle._C_ops.reshape(transpose_144, full_int_array_70) + del full_int_array_70 + + # pd_op.roll: (8x7x7x1536xf32) <- (8x7x7x1536xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_209, full_int_array_5, [1, 2]) + + # pd_op.reshape: (8x49x1536xf32) <- (8x7x7x1536xf32, 3xi64) + reshape_210 = paddle._C_ops.reshape(roll_23, full_int_array_73) + del full_int_array_73 + + # pd_op.full: (xf32) <- () + full_24 = paddle._C_ops.full( + [], float("0.9"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_266 = full_24 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_44 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_226 = paddle._C_ops.add(full_24, uniform_44) + del uniform_44 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_44 = paddle._C_ops.floor(add_226) + del add_226 + + # pd_op.divide: (8x49x1536xf32) <- (8x49x1536xf32, xf32) + divide_44 = paddle._C_ops.divide(reshape_210, full_24) + + # pd_op.multiply: (8x49x1536xf32) <- (8x49x1536xf32, 8x1x1xf32) + multiply_44 = paddle._C_ops.multiply(divide_44, floor_44) + + # pd_op.add: (8x49x1536xf32) <- (8x49x1536xf32, 8x49x1536xf32) + add_165 = paddle._C_ops.add(add_160, multiply_44) + + # pd_op.layer_norm: (8x49x1536xf32, 8x49xf32, 8x49xf32) <- (8x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_165, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (8x49x6144xf32) <- (8x49x1536xf32, 1536x6144xf32) + matmul_121 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (8x49x6144xf32) <- (8x49x6144xf32, 6144xf32) + add_166 = paddle._C_ops.add(matmul_121, parameter_6) + del parameter_6 + + # pd_op.gelu: (8x49x6144xf32) <- (8x49x6144xf32) + gelu_23 = paddle._C_ops.gelu(add_166, False) + + # pd_op.matmul: (8x49x1536xf32) <- (8x49x6144xf32, 6144x1536xf32) + matmul_122 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del parameter_5 + + # pd_op.add: (8x49x1536xf32) <- (8x49x1536xf32, 1536xf32) + add_167 = paddle._C_ops.add(matmul_122, parameter_4) + del parameter_4 + + # pd_op.uniform: (8x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_45 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + del full_29, full_30, full_int_array_40 + + # pd_op.add: (8x1x1xf32) <- (xf32, 8x1x1xf32) + add_227 = paddle._C_ops.add(full_24, uniform_45) + del uniform_45 + + # pd_op.floor: (8x1x1xf32) <- (8x1x1xf32) + floor_45 = paddle._C_ops.floor(add_227) + del add_227 + + # pd_op.divide: (8x49x1536xf32) <- (8x49x1536xf32, xf32) + divide_45 = paddle._C_ops.divide(add_167, full_24) + + # pd_op.multiply: (8x49x1536xf32) <- (8x49x1536xf32, 8x1x1xf32) + multiply_45 = paddle._C_ops.multiply(divide_45, floor_45) + + # pd_op.add: (8x49x1536xf32) <- (8x49x1536xf32, 8x49x1536xf32) + add_168 = paddle._C_ops.add(add_165, multiply_45) + + # pd_op.layer_norm: (8x49x1536xf32, 8x49xf32, 8x49xf32) <- (8x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_158, layer_norm_156, layer_norm_157 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_168, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_2, parameter_3 + + # pd_op.transpose: (8x1536x49xf32) <- (8x49x1536xf32) + transpose_145 = paddle._C_ops.transpose(layer_norm_158, [0, 2, 1]) + del layer_norm_158 + + # pd_op.unsqueeze: (8x1536x1x49xf32) <- (8x1536x49xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_145, full_int_array_2) + + # pd_op.pool2d: (8x1536x1x1xf32) <- (8x1536x1x49xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_36, + full_int_array_23, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_23 + + # pd_op.squeeze: (8x1536x1xf32) <- (8x1536x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_2) + + # pd_op.flatten: (8x1536xf32) <- (8x1536x1xf32) + flatten_0 = paddle._C_ops.flatten(squeeze_0, 1, 2) + + # pd_op.matmul: (8x102xf32) <- (8x1536xf32, 1536x102xf32) + matmul_123 = paddle._C_ops.matmul(flatten_0, parameter_1, False, False) + del parameter_1 + + # pd_op.add: (8x102xf32) <- (8x102xf32, 102xf32) + add_169 = paddle._C_ops.add(matmul_123, parameter_0) + del ( + assign_0, + assign_1, + assign_10, + assign_101, + assign_103, + assign_104, + assign_105, + assign_106, + assign_107, + assign_108, + assign_109, + assign_111, + assign_112, + assign_114, + assign_115, + assign_116, + assign_117, + assign_118, + assign_119, + assign_12, + assign_121, + assign_123, + assign_124, + assign_125, + assign_126, + assign_127, + assign_128, + assign_129, + assign_13, + assign_131, + assign_132, + assign_134, + assign_135, + assign_136, + assign_137, + assign_138, + assign_139, + assign_14, + assign_141, + assign_143, + assign_144, + assign_145, + assign_146, + assign_147, + assign_148, + assign_149, + assign_15, + assign_151, + assign_152, + assign_154, + assign_155, + assign_156, + assign_157, + assign_158, + assign_159, + assign_16, + assign_161, + assign_163, + assign_164, + assign_165, + assign_166, + assign_167, + assign_168, + assign_169, + assign_17, + assign_171, + assign_172, + assign_174, + assign_175, + assign_176, + assign_177, + assign_178, + assign_179, + assign_18, + assign_181, + assign_183, + assign_184, + assign_185, + assign_186, + assign_187, + assign_188, + assign_189, + assign_19, + assign_191, + assign_192, + assign_194, + assign_195, + assign_196, + assign_197, + assign_198, + assign_199, + assign_2, + assign_20, + assign_201, + assign_203, + assign_204, + assign_205, + assign_206, + assign_207, + assign_208, + assign_209, + assign_21, + assign_211, + assign_212, + assign_214, + assign_215, + assign_216, + assign_217, + assign_218, + assign_219, + assign_22, + assign_221, + assign_223, + assign_224, + assign_225, + assign_226, + assign_227, + assign_228, + assign_229, + assign_23, + assign_231, + assign_232, + assign_234, + assign_235, + assign_236, + assign_237, + assign_238, + assign_239, + assign_24, + assign_240, + assign_241, + assign_242, + assign_243, + assign_244, + assign_245, + assign_247, + assign_248, + assign_249, + assign_25, + assign_250, + assign_251, + assign_252, + assign_254, + assign_256, + assign_257, + assign_258, + assign_259, + assign_26, + assign_260, + assign_261, + assign_262, + assign_264, + assign_265, + assign_267, + assign_268, + assign_269, + assign_28, + assign_3, + assign_30, + assign_31, + assign_32, + assign_33, + assign_34, + assign_35, + assign_36, + assign_38, + assign_39, + assign_4, + assign_41, + assign_42, + assign_43, + assign_44, + assign_45, + assign_46, + assign_47, + assign_48, + assign_49, + assign_5, + assign_50, + assign_51, + assign_52, + assign_54, + assign_55, + assign_56, + assign_57, + assign_58, + assign_59, + assign_6, + assign_61, + assign_63, + assign_64, + assign_65, + assign_66, + assign_67, + assign_68, + assign_69, + assign_7, + assign_71, + assign_72, + assign_74, + assign_75, + assign_76, + assign_77, + assign_78, + assign_79, + assign_8, + assign_81, + assign_83, + assign_84, + assign_85, + assign_86, + assign_87, + assign_88, + assign_89, + assign_91, + assign_92, + assign_94, + assign_95, + assign_96, + assign_97, + assign_98, + assign_99, + full_int_array_0, + full_int_array_1, + full_int_array_2, + full_int_array_3, + full_int_array_4, + full_int_array_5, + full_int_array_6, + full_int_array_7, + full_int_array_8, + parameter_0, + ) + + return ( + conv2d_0, + reshape_0, + add_0, + transpose_0, + layer_norm_0, + layer_norm_1, + layer_norm_2, + layer_norm_3, + layer_norm_4, + layer_norm_5, + reshape_1, + transpose_1, + reshape_2, + reshape_3, + matmul_0, + add_1, + transpose_2, + slice_0, + full_0, + scale_0, + transpose_3, + matmul_1, + reshape_4, + index_select_0, + transpose_4, + unsqueeze_0, + softmax_0, + transpose_5, + reshape_5, + matmul_2, + add_2, + reshape_6, + transpose_6, + reshape_7, + reshape_8, + add_3, + layer_norm_6, + layer_norm_7, + layer_norm_8, + matmul_3, + add_4, + gelu_0, + matmul_4, + add_5, + add_6, + layer_norm_9, + layer_norm_10, + layer_norm_11, + reshape_9, + roll_0, + transpose_7, + reshape_10, + reshape_11, + matmul_5, + add_7, + transpose_8, + slice_1, + assign_9, + scale_1, + transpose_9, + matmul_6, + reshape_12, + index_select_1, + transpose_10, + unsqueeze_1, + add_8, + reshape_13, + unsqueeze_2, + add_9, + softmax_1, + transpose_11, + reshape_14, + matmul_7, + add_10, + reshape_15, + transpose_12, + reshape_16, + roll_1, + reshape_17, + full_1, + floor_0, + divide_0, + multiply_0, + add_11, + layer_norm_12, + layer_norm_13, + layer_norm_14, + matmul_8, + add_12, + gelu_1, + matmul_9, + add_13, + assign_11, + floor_1, + divide_1, + multiply_1, + add_14, + reshape_18, + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + full_2, + concat_0, + reshape_19, + layer_norm_15, + layer_norm_16, + layer_norm_17, + matmul_10, + layer_norm_18, + layer_norm_19, + layer_norm_20, + reshape_20, + transpose_13, + reshape_21, + reshape_22, + matmul_11, + add_15, + transpose_14, + slice_2, + assign_27, + scale_2, + transpose_15, + matmul_12, + reshape_23, + index_select_2, + transpose_16, + unsqueeze_3, + softmax_2, + transpose_17, + reshape_24, + matmul_13, + add_16, + reshape_25, + transpose_18, + reshape_26, + reshape_27, + full_3, + floor_2, + divide_2, + multiply_2, + add_17, + layer_norm_21, + layer_norm_22, + layer_norm_23, + matmul_14, + add_18, + gelu_2, + matmul_15, + add_19, + assign_29, + floor_3, + divide_3, + multiply_3, + add_20, + layer_norm_24, + layer_norm_25, + layer_norm_26, + reshape_28, + roll_2, + transpose_19, + reshape_29, + reshape_30, + matmul_16, + add_21, + transpose_20, + slice_3, + assign_37, + scale_3, + transpose_21, + matmul_17, + reshape_31, + index_select_3, + transpose_22, + unsqueeze_4, + add_22, + reshape_32, + unsqueeze_5, + add_23, + softmax_3, + transpose_23, + reshape_33, + matmul_18, + add_24, + reshape_34, + transpose_24, + reshape_35, + roll_3, + reshape_36, + full_4, + floor_4, + divide_4, + multiply_4, + add_25, + layer_norm_27, + layer_norm_28, + layer_norm_29, + matmul_19, + add_26, + gelu_3, + matmul_20, + add_27, + assign_40, + floor_5, + divide_5, + multiply_5, + add_28, + reshape_37, + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + assign_53, + concat_1, + reshape_38, + layer_norm_30, + layer_norm_31, + layer_norm_32, + matmul_21, + layer_norm_33, + layer_norm_34, + layer_norm_35, + reshape_39, + transpose_25, + reshape_40, + reshape_41, + matmul_22, + add_29, + transpose_26, + slice_4, + assign_60, + scale_4, + transpose_27, + matmul_23, + reshape_42, + index_select_4, + transpose_28, + unsqueeze_6, + softmax_4, + transpose_29, + reshape_43, + matmul_24, + add_30, + reshape_44, + transpose_30, + reshape_45, + reshape_46, + full_5, + floor_6, + divide_6, + multiply_6, + add_31, + layer_norm_36, + layer_norm_37, + layer_norm_38, + matmul_25, + add_32, + gelu_4, + matmul_26, + add_33, + assign_62, + floor_7, + divide_7, + multiply_7, + add_34, + layer_norm_39, + layer_norm_40, + layer_norm_41, + reshape_47, + roll_4, + transpose_31, + reshape_48, + reshape_49, + matmul_27, + add_35, + transpose_32, + slice_5, + assign_70, + scale_5, + transpose_33, + matmul_28, + reshape_50, + index_select_5, + transpose_34, + unsqueeze_7, + add_36, + reshape_51, + unsqueeze_8, + add_37, + softmax_5, + transpose_35, + reshape_52, + matmul_29, + add_38, + reshape_53, + transpose_36, + reshape_54, + roll_5, + reshape_55, + full_6, + floor_8, + divide_8, + multiply_8, + add_39, + layer_norm_42, + layer_norm_43, + layer_norm_44, + matmul_30, + add_40, + gelu_5, + matmul_31, + add_41, + assign_73, + floor_9, + divide_9, + multiply_9, + add_42, + layer_norm_45, + layer_norm_46, + layer_norm_47, + reshape_56, + transpose_37, + reshape_57, + reshape_58, + matmul_32, + add_43, + transpose_38, + slice_6, + assign_80, + scale_6, + transpose_39, + matmul_33, + reshape_59, + index_select_6, + transpose_40, + unsqueeze_9, + softmax_6, + transpose_41, + reshape_60, + matmul_34, + add_44, + reshape_61, + transpose_42, + reshape_62, + reshape_63, + full_7, + floor_10, + divide_10, + multiply_10, + add_45, + layer_norm_48, + layer_norm_49, + layer_norm_50, + matmul_35, + add_46, + gelu_6, + matmul_36, + add_47, + assign_82, + floor_11, + divide_11, + multiply_11, + add_48, + layer_norm_51, + layer_norm_52, + layer_norm_53, + reshape_64, + roll_6, + transpose_43, + reshape_65, + reshape_66, + matmul_37, + add_49, + transpose_44, + slice_7, + assign_90, + scale_7, + transpose_45, + matmul_38, + reshape_67, + index_select_7, + transpose_46, + unsqueeze_10, + add_50, + reshape_68, + unsqueeze_11, + add_51, + softmax_7, + transpose_47, + reshape_69, + matmul_39, + add_52, + reshape_70, + transpose_48, + reshape_71, + roll_7, + reshape_72, + full_8, + floor_12, + divide_12, + multiply_12, + add_53, + layer_norm_54, + layer_norm_55, + layer_norm_56, + matmul_40, + add_54, + gelu_7, + matmul_41, + add_55, + assign_93, + floor_13, + divide_13, + multiply_13, + add_56, + layer_norm_57, + layer_norm_58, + layer_norm_59, + reshape_73, + transpose_49, + reshape_74, + reshape_75, + matmul_42, + add_57, + transpose_50, + slice_8, + assign_100, + scale_8, + transpose_51, + matmul_43, + reshape_76, + index_select_8, + transpose_52, + unsqueeze_12, + softmax_8, + transpose_53, + reshape_77, + matmul_44, + add_58, + reshape_78, + transpose_54, + reshape_79, + reshape_80, + full_9, + floor_14, + divide_14, + multiply_14, + add_59, + layer_norm_60, + layer_norm_61, + layer_norm_62, + matmul_45, + add_60, + gelu_8, + matmul_46, + add_61, + assign_102, + floor_15, + divide_15, + multiply_15, + add_62, + layer_norm_63, + layer_norm_64, + layer_norm_65, + reshape_81, + roll_8, + transpose_55, + reshape_82, + reshape_83, + matmul_47, + add_63, + transpose_56, + slice_9, + assign_110, + scale_9, + transpose_57, + matmul_48, + reshape_84, + index_select_9, + transpose_58, + unsqueeze_13, + add_64, + reshape_85, + unsqueeze_14, + add_65, + softmax_9, + transpose_59, + reshape_86, + matmul_49, + add_66, + reshape_87, + transpose_60, + reshape_88, + roll_9, + reshape_89, + full_10, + floor_16, + divide_16, + multiply_16, + add_67, + layer_norm_66, + layer_norm_67, + layer_norm_68, + matmul_50, + add_68, + gelu_9, + matmul_51, + add_69, + assign_113, + floor_17, + divide_17, + multiply_17, + add_70, + layer_norm_69, + layer_norm_70, + layer_norm_71, + reshape_90, + transpose_61, + reshape_91, + reshape_92, + matmul_52, + add_71, + transpose_62, + slice_10, + assign_120, + scale_10, + transpose_63, + matmul_53, + reshape_93, + index_select_10, + transpose_64, + unsqueeze_15, + softmax_10, + transpose_65, + reshape_94, + matmul_54, + add_72, + reshape_95, + transpose_66, + reshape_96, + reshape_97, + full_11, + floor_18, + divide_18, + multiply_18, + add_73, + layer_norm_72, + layer_norm_73, + layer_norm_74, + matmul_55, + add_74, + gelu_10, + matmul_56, + add_75, + assign_122, + floor_19, + divide_19, + multiply_19, + add_76, + layer_norm_75, + layer_norm_76, + layer_norm_77, + reshape_98, + roll_10, + transpose_67, + reshape_99, + reshape_100, + matmul_57, + add_77, + transpose_68, + slice_11, + assign_130, + scale_11, + transpose_69, + matmul_58, + reshape_101, + index_select_11, + transpose_70, + unsqueeze_16, + add_78, + reshape_102, + unsqueeze_17, + add_79, + softmax_11, + transpose_71, + reshape_103, + matmul_59, + add_80, + reshape_104, + transpose_72, + reshape_105, + roll_11, + reshape_106, + full_12, + floor_20, + divide_20, + multiply_20, + add_81, + layer_norm_78, + layer_norm_79, + layer_norm_80, + matmul_60, + add_82, + gelu_11, + matmul_61, + add_83, + assign_133, + floor_21, + divide_21, + multiply_21, + add_84, + layer_norm_81, + layer_norm_82, + layer_norm_83, + reshape_107, + transpose_73, + reshape_108, + reshape_109, + matmul_62, + add_85, + transpose_74, + slice_12, + assign_140, + scale_12, + transpose_75, + matmul_63, + reshape_110, + index_select_12, + transpose_76, + unsqueeze_18, + softmax_12, + transpose_77, + reshape_111, + matmul_64, + add_86, + reshape_112, + transpose_78, + reshape_113, + reshape_114, + full_13, + floor_22, + divide_22, + multiply_22, + add_87, + layer_norm_84, + layer_norm_85, + layer_norm_86, + matmul_65, + add_88, + gelu_12, + matmul_66, + add_89, + assign_142, + floor_23, + divide_23, + multiply_23, + add_90, + layer_norm_87, + layer_norm_88, + layer_norm_89, + reshape_115, + roll_12, + transpose_79, + reshape_116, + reshape_117, + matmul_67, + add_91, + transpose_80, + slice_13, + assign_150, + scale_13, + transpose_81, + matmul_68, + reshape_118, + index_select_13, + transpose_82, + unsqueeze_19, + add_92, + reshape_119, + unsqueeze_20, + add_93, + softmax_13, + transpose_83, + reshape_120, + matmul_69, + add_94, + reshape_121, + transpose_84, + reshape_122, + roll_13, + reshape_123, + full_14, + floor_24, + divide_24, + multiply_24, + add_95, + layer_norm_90, + layer_norm_91, + layer_norm_92, + matmul_70, + add_96, + gelu_13, + matmul_71, + add_97, + assign_153, + floor_25, + divide_25, + multiply_25, + add_98, + layer_norm_93, + layer_norm_94, + layer_norm_95, + reshape_124, + transpose_85, + reshape_125, + reshape_126, + matmul_72, + add_99, + transpose_86, + slice_14, + assign_160, + scale_14, + transpose_87, + matmul_73, + reshape_127, + index_select_14, + transpose_88, + unsqueeze_21, + softmax_14, + transpose_89, + reshape_128, + matmul_74, + add_100, + reshape_129, + transpose_90, + reshape_130, + reshape_131, + full_15, + floor_26, + divide_26, + multiply_26, + add_101, + layer_norm_96, + layer_norm_97, + layer_norm_98, + matmul_75, + add_102, + gelu_14, + matmul_76, + add_103, + assign_162, + floor_27, + divide_27, + multiply_27, + add_104, + layer_norm_99, + layer_norm_100, + layer_norm_101, + reshape_132, + roll_14, + transpose_91, + reshape_133, + reshape_134, + matmul_77, + add_105, + transpose_92, + slice_15, + assign_170, + scale_15, + transpose_93, + matmul_78, + reshape_135, + index_select_15, + transpose_94, + unsqueeze_22, + add_106, + reshape_136, + unsqueeze_23, + add_107, + softmax_15, + transpose_95, + reshape_137, + matmul_79, + add_108, + reshape_138, + transpose_96, + reshape_139, + roll_15, + reshape_140, + full_16, + floor_28, + divide_28, + multiply_28, + add_109, + layer_norm_102, + layer_norm_103, + layer_norm_104, + matmul_80, + add_110, + gelu_15, + matmul_81, + add_111, + assign_173, + floor_29, + divide_29, + multiply_29, + add_112, + layer_norm_105, + layer_norm_106, + layer_norm_107, + reshape_141, + transpose_97, + reshape_142, + reshape_143, + matmul_82, + add_113, + transpose_98, + slice_16, + assign_180, + scale_16, + transpose_99, + matmul_83, + reshape_144, + index_select_16, + transpose_100, + unsqueeze_24, + softmax_16, + transpose_101, + reshape_145, + matmul_84, + add_114, + reshape_146, + transpose_102, + reshape_147, + reshape_148, + full_17, + floor_30, + divide_30, + multiply_30, + add_115, + layer_norm_108, + layer_norm_109, + layer_norm_110, + matmul_85, + add_116, + gelu_16, + matmul_86, + add_117, + assign_182, + floor_31, + divide_31, + multiply_31, + add_118, + layer_norm_111, + layer_norm_112, + layer_norm_113, + reshape_149, + roll_16, + transpose_103, + reshape_150, + reshape_151, + matmul_87, + add_119, + transpose_104, + slice_17, + assign_190, + scale_17, + transpose_105, + matmul_88, + reshape_152, + index_select_17, + transpose_106, + unsqueeze_25, + add_120, + reshape_153, + unsqueeze_26, + add_121, + softmax_17, + transpose_107, + reshape_154, + matmul_89, + add_122, + reshape_155, + transpose_108, + reshape_156, + roll_17, + reshape_157, + full_18, + floor_32, + divide_32, + multiply_32, + add_123, + layer_norm_114, + layer_norm_115, + layer_norm_116, + matmul_90, + add_124, + gelu_17, + matmul_91, + add_125, + assign_193, + floor_33, + divide_33, + multiply_33, + add_126, + layer_norm_117, + layer_norm_118, + layer_norm_119, + reshape_158, + transpose_109, + reshape_159, + reshape_160, + matmul_92, + add_127, + transpose_110, + slice_18, + assign_200, + scale_18, + transpose_111, + matmul_93, + reshape_161, + index_select_18, + transpose_112, + unsqueeze_27, + softmax_18, + transpose_113, + reshape_162, + matmul_94, + add_128, + reshape_163, + transpose_114, + reshape_164, + reshape_165, + full_19, + floor_34, + divide_34, + multiply_34, + add_129, + layer_norm_120, + layer_norm_121, + layer_norm_122, + matmul_95, + add_130, + gelu_18, + matmul_96, + add_131, + assign_202, + floor_35, + divide_35, + multiply_35, + add_132, + layer_norm_123, + layer_norm_124, + layer_norm_125, + reshape_166, + roll_18, + transpose_115, + reshape_167, + reshape_168, + matmul_97, + add_133, + transpose_116, + slice_19, + assign_210, + scale_19, + transpose_117, + matmul_98, + reshape_169, + index_select_19, + transpose_118, + unsqueeze_28, + add_134, + reshape_170, + unsqueeze_29, + add_135, + softmax_19, + transpose_119, + reshape_171, + matmul_99, + add_136, + reshape_172, + transpose_120, + reshape_173, + roll_19, + reshape_174, + full_20, + floor_36, + divide_36, + multiply_36, + add_137, + layer_norm_126, + layer_norm_127, + layer_norm_128, + matmul_100, + add_138, + gelu_19, + matmul_101, + add_139, + assign_213, + floor_37, + divide_37, + multiply_37, + add_140, + layer_norm_129, + layer_norm_130, + layer_norm_131, + reshape_175, + transpose_121, + reshape_176, + reshape_177, + matmul_102, + add_141, + transpose_122, + slice_20, + assign_220, + scale_20, + transpose_123, + matmul_103, + reshape_178, + index_select_20, + transpose_124, + unsqueeze_30, + softmax_20, + transpose_125, + reshape_179, + matmul_104, + add_142, + reshape_180, + transpose_126, + reshape_181, + reshape_182, + full_21, + floor_38, + divide_38, + multiply_38, + add_143, + layer_norm_132, + layer_norm_133, + layer_norm_134, + matmul_105, + add_144, + gelu_20, + matmul_106, + add_145, + assign_222, + floor_39, + divide_39, + multiply_39, + add_146, + layer_norm_135, + layer_norm_136, + layer_norm_137, + reshape_183, + roll_20, + transpose_127, + reshape_184, + reshape_185, + matmul_107, + add_147, + transpose_128, + slice_21, + assign_230, + scale_21, + transpose_129, + matmul_108, + reshape_186, + index_select_21, + transpose_130, + unsqueeze_31, + add_148, + reshape_187, + unsqueeze_32, + add_149, + softmax_21, + transpose_131, + reshape_188, + matmul_109, + add_150, + reshape_189, + transpose_132, + reshape_190, + roll_21, + reshape_191, + full_22, + floor_40, + divide_40, + multiply_40, + add_151, + layer_norm_138, + layer_norm_139, + layer_norm_140, + matmul_110, + add_152, + gelu_21, + matmul_111, + add_153, + assign_233, + floor_41, + divide_41, + multiply_41, + add_154, + reshape_192, + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + assign_246, + concat_2, + reshape_193, + layer_norm_141, + layer_norm_142, + layer_norm_143, + matmul_112, + layer_norm_144, + layer_norm_145, + layer_norm_146, + reshape_194, + transpose_133, + reshape_195, + reshape_196, + matmul_113, + add_155, + transpose_134, + slice_22, + assign_253, + scale_22, + transpose_135, + matmul_114, + reshape_197, + index_select_22, + transpose_136, + unsqueeze_33, + softmax_22, + transpose_137, + reshape_198, + matmul_115, + add_156, + reshape_199, + transpose_138, + reshape_200, + reshape_201, + full_23, + floor_42, + divide_42, + multiply_42, + add_157, + layer_norm_147, + layer_norm_148, + layer_norm_149, + matmul_116, + add_158, + gelu_22, + matmul_117, + add_159, + assign_255, + floor_43, + divide_43, + multiply_43, + add_160, + layer_norm_150, + layer_norm_151, + layer_norm_152, + reshape_202, + roll_22, + transpose_139, + reshape_203, + reshape_204, + matmul_118, + add_161, + transpose_140, + slice_23, + assign_263, + scale_23, + transpose_141, + matmul_119, + reshape_205, + index_select_23, + transpose_142, + unsqueeze_34, + add_162, + reshape_206, + unsqueeze_35, + add_163, + softmax_23, + transpose_143, + reshape_207, + matmul_120, + add_164, + reshape_208, + transpose_144, + reshape_209, + roll_23, + reshape_210, + full_24, + floor_44, + divide_44, + multiply_44, + add_165, + layer_norm_153, + layer_norm_154, + layer_norm_155, + matmul_121, + add_166, + gelu_23, + matmul_122, + add_167, + assign_266, + floor_45, + divide_45, + multiply_45, + add_168, + layer_norm_156, + layer_norm_157, + transpose_145, + unsqueeze_36, + pool2d_0, + squeeze_0, + flatten_0, + matmul_123, + add_169, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/weight_meta.py new file mode 100644 index 00000000..88a6a1a4 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_0/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [1536, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [6144, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1536, 6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [1536, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1536, 4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [6144, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [1536, 6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1536, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [1536, 4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [3072, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [1536, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [768, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [192, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/graph_net.json b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/input_meta.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/input_meta.py new file mode 100644 index 00000000..34b9496d --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [128, 3, 224, 224] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [169, 6] + dtype = "float32" + low = -10.3312 + high = 6.98733 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [169, 6] + dtype = "float32" + low = -8.58572 + high = 5.95655 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [169, 12] + dtype = "float32" + low = -9.91305 + high = 6.31495 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [169, 12] + dtype = "float32" + low = -8.17213 + high = 5.93846 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [169, 48] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [169, 48] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/model.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/model.py new file mode 100644 index 00000000..09131f42 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/model.py @@ -0,0 +1,9768 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.shape64: (4xi64) <- (-1x3x224x224xf32) + shape64_0 = paddle._C_ops.shape64(data_0) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [1] + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + shape64_0, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_0 + + # pd_op.conv2d: (-1x192x56x56xf32) <- (-1x3x224x224xf32, 192x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_2 = [1, -1, 1, 1] + + # pd_op.reshape: (1x192x1x1xf32) <- (192xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_2) + del full_int_array_2, parameter_303 + + # pd_op.add: (-1x192x56x56xf32) <- (-1x192x56x56xf32, 1x192x1x1xf32) + add_1 = paddle._C_ops.add(conv2d_0, reshape_0) + del conv2d_0, reshape_0 + + # pd_op.shape64: (4xi64) <- (-1x192x56x56xf32) + shape64_1 = paddle._C_ops.shape64(add_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + shape64_1, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_1 + + # pd_op.flatten: (-1x192x3136xf32) <- (-1x192x56x56xf32) + flatten_0 = paddle._C_ops.flatten(add_1, 2, 3) + del add_1 + + # pd_op.transpose: (-1x3136x192xf32) <- (-1x192x3136xf32) + transpose_0 = paddle._C_ops.transpose(flatten_0, [0, 2, 1]) + del flatten_0 + + # pd_op.layer_norm: (-1x3136x192xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x192xf32, 192xf32, 192xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302, transpose_0 + + # pd_op.shape64: (3xi64) <- (-1x3136x192xf32) + shape64_2 = paddle._C_ops.shape64(layer_norm_0) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + shape64_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_2 + + # pd_op.layer_norm: (-1x3136x192xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x192xf32, 192xf32, 192xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("56"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_1 = paddle._C_ops.full( + [], float("192"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_0 = [slice_2, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_0 = paddle._C_ops.stack(combine_0, 0) + del combine_0 + + # pd_op.reshape: (-1x56x56x192xf32) <- (-1x3136x192xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, stack_0) + del layer_norm_3, stack_0 + + # pd_op.shape64: (4xi64) <- (-1x56x56x192xf32) + shape64_3 = paddle._C_ops.shape64(reshape_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + shape64_3, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_3 + + # pd_op.full: (xi64) <- () + full_2 = paddle._C_ops.full( + [], float("8"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_3 = paddle._C_ops.full( + [], float("7"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_1 = [slice_3, full_2, full_3, full_2, full_3, full_1] + del slice_3 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_1 = paddle._C_ops.stack(combine_1, 0) + del combine_1 + + # pd_op.reshape: (-1x8x7x8x7x192xf32) <- (-1x56x56x192xf32, 6xi64) + reshape_2 = paddle._C_ops.reshape(reshape_1, stack_1) + del reshape_1, stack_1 + + # pd_op.transpose: (-1x8x8x7x7x192xf32) <- (-1x8x7x8x7x192xf32) + transpose_1 = paddle._C_ops.transpose(reshape_2, [0, 1, 3, 2, 4, 5]) + del reshape_2 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_3 = [-1, 7, 7, 192] + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x8x8x7x7x192xf32, 4xi64) + reshape_3 = paddle._C_ops.reshape(transpose_1, full_int_array_3) + del transpose_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [-1, 49, 192] + + # pd_op.reshape: (-1x49x192xf32) <- (-1x7x7x192xf32, 3xi64) + reshape_4 = paddle._C_ops.reshape(reshape_3, full_int_array_4) + del reshape_3 + + # pd_op.shape64: (3xi64) <- (-1x49x192xf32) + shape64_4 = paddle._C_ops.shape64(reshape_4) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + shape64_4, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_4 + + # pd_op.matmul: (-1x49x576xf32) <- (-1x49x192xf32, 192x576xf32) + matmul_0 = paddle._C_ops.matmul(reshape_4, parameter_298, False, False) + del parameter_298, reshape_4 + + # pd_op.add: (-1x49x576xf32) <- (-1x49x576xf32, 576xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_297) + del matmul_0, parameter_297 + + # pd_op.full: (xi64) <- () + full_4 = paddle._C_ops.full( + [], float("49"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_5 = paddle._C_ops.full( + [], float("3"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_6 = paddle._C_ops.full( + [], float("6"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_7 = paddle._C_ops.full( + [], float("32"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_2 = [slice_4, full_4, full_5, full_6, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_2 = paddle._C_ops.stack(combine_2, 0) + del combine_2 + + # pd_op.reshape: (-1x49x3x6x32xf32) <- (-1x49x576xf32, 5xi64) + reshape_5 = paddle._C_ops.reshape(add_2, stack_2) + del add_2, stack_2 + + # pd_op.transpose: (3x-1x6x49x32xf32) <- (-1x49x3x6x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_5, [2, 0, 3, 1, 4]) + del reshape_5 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_5 = [2] + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_6 = [3] + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_2 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (-1x6x49x32xf32) <- (-1x6x49x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_5, full_8, float("0"), True) + del slice_5 + + # pd_op.transpose: (-1x6x32x49xf32) <- (-1x6x49x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_6, [0, 1, 3, 2]) + del slice_6 + + # pd_op.matmul: (-1x6x49x49xf32) <- (-1x6x49x32xf32, -1x6x32x49xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + del scale_0, transpose_3 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [-1] + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_6 = paddle._C_ops.reshape(data_1, full_int_array_7) + del data_1 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_0 = paddle._C_ops.index_select(data_2, reshape_6, 0) + del data_2, reshape_6 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_8 = [49, 49, -1] + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(index_select_0, full_int_array_8) + del index_select_0 + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_4 = paddle._C_ops.transpose(reshape_7, [2, 0, 1]) + del reshape_7 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_0) + del transpose_4 + + # pd_op.add: (-1x6x49x49xf32) <- (-1x6x49x49xf32, 1x6x49x49xf32) + add_3 = paddle._C_ops.add(matmul_1, unsqueeze_0) + del matmul_1, unsqueeze_0 + + # pd_op.softmax: (-1x6x49x49xf32) <- (-1x6x49x49xf32) + softmax_0 = paddle._C_ops.softmax(add_3, -1) + del add_3 + + # pd_op.matmul: (-1x6x49x32xf32) <- (-1x6x49x49xf32, -1x6x49x32xf32) + matmul_2 = paddle._C_ops.matmul(softmax_0, slice_7, False, False) + del slice_7, softmax_0 + + # pd_op.transpose: (-1x49x6x32xf32) <- (-1x6x49x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_2, [0, 2, 1, 3]) + del matmul_2 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_3 = [slice_4, full_4, full_1] + del slice_4 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_3 = paddle._C_ops.stack(combine_3, 0) + del combine_3 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x49x6x32xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(transpose_5, stack_3) + del stack_3, transpose_5 + + # pd_op.matmul: (-1x49x192xf32) <- (-1x49x192xf32, 192x192xf32) + matmul_3 = paddle._C_ops.matmul(reshape_8, parameter_296, False, False) + del parameter_296, reshape_8 + + # pd_op.add: (-1x49x192xf32) <- (-1x49x192xf32, 192xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_295) + del matmul_3, parameter_295 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x49x192xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_4, full_int_array_3) + del add_4 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_9 = [-1, 8, 8, 7, 7, 192] + + # pd_op.reshape: (-1x8x8x7x7x192xf32) <- (-1x7x7x192xf32, 6xi64) + reshape_10 = paddle._C_ops.reshape(reshape_9, full_int_array_9) + del reshape_9 + + # pd_op.transpose: (-1x8x7x8x7x192xf32) <- (-1x8x8x7x7x192xf32) + transpose_6 = paddle._C_ops.transpose(reshape_10, [0, 1, 3, 2, 4, 5]) + del reshape_10 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [-1, 56, 56, 192] + + # pd_op.reshape: (-1x56x56x192xf32) <- (-1x8x7x8x7x192xf32, 4xi64) + reshape_11 = paddle._C_ops.reshape(transpose_6, full_int_array_10) + del transpose_6 + + # pd_op.full: (xi64) <- () + full_9 = paddle._C_ops.full( + [], float("3136"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_4 = [slice_2, full_9, full_1] + del slice_2 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_4 = paddle._C_ops.stack(combine_4, 0) + del combine_4 + + # pd_op.reshape: (-1x3136x192xf32) <- (-1x56x56x192xf32, 3xi64) + reshape_12 = paddle._C_ops.reshape(reshape_11, stack_4) + del reshape_11, stack_4 + + # pd_op.add: (-1x3136x192xf32) <- (-1x3136x192xf32, -1x3136x192xf32) + add_5 = paddle._C_ops.add(layer_norm_0, reshape_12) + del layer_norm_0, reshape_12 + + # pd_op.layer_norm: (-1x3136x192xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x192xf32, 192xf32, 192xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_5, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (-1x3136x768xf32) <- (-1x3136x192xf32, 192x768xf32) + matmul_4 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del layer_norm_6, parameter_292 + + # pd_op.add: (-1x3136x768xf32) <- (-1x3136x768xf32, 768xf32) + add_6 = paddle._C_ops.add(matmul_4, parameter_291) + del matmul_4, parameter_291 + + # pd_op.gelu: (-1x3136x768xf32) <- (-1x3136x768xf32) + gelu_0 = paddle._C_ops.gelu(add_6, False) + del add_6 + + # pd_op.matmul: (-1x3136x192xf32) <- (-1x3136x768xf32, 768x192xf32) + matmul_5 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del gelu_0, parameter_290 + + # pd_op.add: (-1x3136x192xf32) <- (-1x3136x192xf32, 192xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_289) + del matmul_5, parameter_289 + + # pd_op.add: (-1x3136x192xf32) <- (-1x3136x192xf32, -1x3136x192xf32) + add_8 = paddle._C_ops.add(add_5, add_7) + del add_5, add_7 + + # pd_op.shape64: (3xi64) <- (-1x3136x192xf32) + shape64_5 = paddle._C_ops.shape64(add_8) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + shape64_5, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_5 + + # pd_op.layer_norm: (-1x3136x192xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x192xf32, 192xf32, 192xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_5 = [slice_8, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_5 = paddle._C_ops.stack(combine_5, 0) + del combine_5 + + # pd_op.reshape: (-1x56x56x192xf32) <- (-1x3136x192xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(layer_norm_9, stack_5) + del layer_norm_9, stack_5 + + # pd_op.shape64: (4xi64) <- (-1x56x56x192xf32) + shape64_6 = paddle._C_ops.shape64(reshape_13) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + shape64_6, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_11 = [-3, -3] + + # pd_op.roll: (-1x56x56x192xf32) <- (-1x56x56x192xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_13, full_int_array_11, [1, 2]) + del reshape_13 + + # pd_op.shape64: (4xi64) <- (-1x56x56x192xf32) + shape64_7 = paddle._C_ops.shape64(roll_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + shape64_7, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_7 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_6 = [slice_10, full_2, full_3, full_2, full_3, full_1] + del full_2, slice_10 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_6 = paddle._C_ops.stack(combine_6, 0) + del combine_6 + + # pd_op.reshape: (-1x8x7x8x7x192xf32) <- (-1x56x56x192xf32, 6xi64) + reshape_14 = paddle._C_ops.reshape(roll_0, stack_6) + del roll_0, stack_6 + + # pd_op.transpose: (-1x8x8x7x7x192xf32) <- (-1x8x7x8x7x192xf32) + transpose_7 = paddle._C_ops.transpose(reshape_14, [0, 1, 3, 2, 4, 5]) + del reshape_14 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x8x8x7x7x192xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(transpose_7, full_int_array_3) + del transpose_7 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x7x7x192xf32, 3xi64) + reshape_16 = paddle._C_ops.reshape(reshape_15, full_int_array_4) + del full_int_array_4, reshape_15 + + # pd_op.full: (1x56x56x1xf32) <- () + full_10 = paddle._C_ops.full( + [1, 56, 56, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_12 = [0, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_13 = [-7, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_14 = [1, 1] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_10, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_10 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_15 = [0, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_16 = [-7, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_17 = [0, -3] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_18 = [-7, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_19 = [-7, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_20 = [-3, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [-3, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-3, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [2147483647, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [2147483647, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [2147483647, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_26 = [1, 8, 7, 8, 7, 1] + + # pd_op.reshape: (1x8x7x8x7x1xf32) <- (1x56x56x1xf32, 6xi64) + reshape_17 = paddle._C_ops.reshape(set_value__0, full_int_array_26) + del full_int_array_26 + + # pd_op.transpose: (1x8x8x7x7x1xf32) <- (1x8x7x8x7x1xf32) + transpose_8 = paddle._C_ops.transpose(reshape_17, [0, 1, 3, 2, 4, 5]) + del reshape_17 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_27 = [-1, 7, 7, 1] + + # pd_op.reshape: (64x7x7x1xf32) <- (1x8x8x7x7x1xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(transpose_8, full_int_array_27) + del transpose_8 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [-1, 49] + + # pd_op.reshape: (64x49xf32) <- (64x7x7x1xf32, 2xi64) + reshape_19 = paddle._C_ops.reshape(reshape_18, full_int_array_28) + del reshape_18 + + # pd_op.unsqueeze: (64x1x49xf32) <- (64x49xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(reshape_19, full_int_array_1) + + # pd_op.unsqueeze: (64x49x1xf32) <- (64x49xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(reshape_19, full_int_array_5) + del reshape_19 + + # pd_op.subtract: (64x49x49xf32) <- (64x1x49xf32, 64x49x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_1, unsqueeze_2) + del unsqueeze_1, unsqueeze_2 + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x49x49xb) <- (64x49x49xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_11) + + # pd_op.full: (64x49x49xf32) <- () + full_12 = paddle._C_ops.full( + [64, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_12, subtract_0) + del full_12, not_equal_0, subtract_0 + + # pd_op.equal: (64x49x49xb) <- (64x49x49xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_11) + + # pd_op.full: (64x49x49xf32) <- () + full_13 = paddle._C_ops.full( + [64, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_1 = paddle._C_ops.where(equal_0, full_13, where_0) + del equal_0, full_13, where_0 + + # pd_op.shape64: (3xi64) <- (-1x49x192xf32) + shape64_8 = paddle._C_ops.shape64(reshape_16) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + shape64_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_8 + + # pd_op.matmul: (-1x49x576xf32) <- (-1x49x192xf32, 192x576xf32) + matmul_6 = paddle._C_ops.matmul(reshape_16, parameter_286, False, False) + del parameter_286, reshape_16 + + # pd_op.add: (-1x49x576xf32) <- (-1x49x576xf32, 576xf32) + add_9 = paddle._C_ops.add(matmul_6, parameter_285) + del matmul_6, parameter_285 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_7 = [slice_11, full_4, full_5, full_6, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_7 = paddle._C_ops.stack(combine_7, 0) + del combine_7 + + # pd_op.reshape: (-1x49x3x6x32xf32) <- (-1x49x576xf32, 5xi64) + reshape_20 = paddle._C_ops.reshape(add_9, stack_7) + del add_9, stack_7 + + # pd_op.transpose: (3x-1x6x49x32xf32) <- (-1x49x3x6x32xf32) + transpose_9 = paddle._C_ops.transpose(reshape_20, [2, 0, 3, 1, 4]) + del reshape_20 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_9 + + # pd_op.scale: (-1x6x49x32xf32) <- (-1x6x49x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_12, full_8, float("0"), True) + del slice_12 + + # pd_op.transpose: (-1x6x32x49xf32) <- (-1x6x49x32xf32) + transpose_10 = paddle._C_ops.transpose(slice_13, [0, 1, 3, 2]) + del slice_13 + + # pd_op.matmul: (-1x6x49x49xf32) <- (-1x6x49x32xf32, -1x6x32x49xf32) + matmul_7 = paddle._C_ops.matmul(scale_1, transpose_10, False, False) + del scale_1, transpose_10 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_21 = paddle._C_ops.reshape(data_3, full_int_array_7) + del data_3 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_1 = paddle._C_ops.index_select(data_4, reshape_21, 0) + del data_4, reshape_21 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(index_select_1, full_int_array_8) + del index_select_1 + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_11 = paddle._C_ops.transpose(reshape_22, [2, 0, 1]) + del reshape_22 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_11, full_int_array_0) + del transpose_11 + + # pd_op.add: (-1x6x49x49xf32) <- (-1x6x49x49xf32, 1x6x49x49xf32) + add_10 = paddle._C_ops.add(matmul_7, unsqueeze_3) + del matmul_7, unsqueeze_3 + + # pd_op.full: (xi64) <- () + full_14 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_0 = paddle._C_ops.floor_divide(slice_11, full_14) + del full_14 + + # pd_op.full: (xi64) <- () + full_15 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_8 = [floor_divide_0, full_15, full_6, full_4, full_4] + del floor_divide_0, full_15 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_8 = paddle._C_ops.stack(combine_8, 0) + del combine_8 + + # pd_op.reshape: (-1x64x6x49x49xf32) <- (-1x6x49x49xf32, 5xi64) + reshape_23 = paddle._C_ops.reshape(add_10, stack_8) + del add_10, stack_8 + + # pd_op.unsqueeze: (64x1x49x49xf32) <- (64x49x49xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(where_1, full_int_array_1) + del where_1 + + # pd_op.unsqueeze: (1x64x1x49x49xf32) <- (64x1x49x49xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_4, full_int_array_0) + del unsqueeze_4 + + # pd_op.add: (-1x64x6x49x49xf32) <- (-1x64x6x49x49xf32, 1x64x1x49x49xf32) + add_11 = paddle._C_ops.add(reshape_23, unsqueeze_5) + del reshape_23, unsqueeze_5 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_9 = [slice_11, full_6, full_4, full_4] + del full_6 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_9 = paddle._C_ops.stack(combine_9, 0) + del combine_9 + + # pd_op.reshape: (-1x6x49x49xf32) <- (-1x64x6x49x49xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_11, stack_9) + del add_11, stack_9 + + # pd_op.softmax: (-1x6x49x49xf32) <- (-1x6x49x49xf32) + softmax_1 = paddle._C_ops.softmax(reshape_24, -1) + del reshape_24 + + # pd_op.matmul: (-1x6x49x32xf32) <- (-1x6x49x49xf32, -1x6x49x32xf32) + matmul_8 = paddle._C_ops.matmul(softmax_1, slice_14, False, False) + del slice_14, softmax_1 + + # pd_op.transpose: (-1x49x6x32xf32) <- (-1x6x49x32xf32) + transpose_12 = paddle._C_ops.transpose(matmul_8, [0, 2, 1, 3]) + del matmul_8 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_10 = [slice_11, full_4, full_1] + del slice_11 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_10 = paddle._C_ops.stack(combine_10, 0) + del combine_10 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x49x6x32xf32, 3xi64) + reshape_25 = paddle._C_ops.reshape(transpose_12, stack_10) + del stack_10, transpose_12 + + # pd_op.matmul: (-1x49x192xf32) <- (-1x49x192xf32, 192x192xf32) + matmul_9 = paddle._C_ops.matmul(reshape_25, parameter_284, False, False) + del parameter_284, reshape_25 + + # pd_op.add: (-1x49x192xf32) <- (-1x49x192xf32, 192xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_283) + del matmul_9, parameter_283 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x49x192xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_12, full_int_array_3) + del add_12, full_int_array_3 + + # pd_op.reshape: (-1x8x8x7x7x192xf32) <- (-1x7x7x192xf32, 6xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, full_int_array_9) + del full_int_array_9, reshape_26 + + # pd_op.transpose: (-1x8x7x8x7x192xf32) <- (-1x8x8x7x7x192xf32) + transpose_13 = paddle._C_ops.transpose(reshape_27, [0, 1, 3, 2, 4, 5]) + del reshape_27 + + # pd_op.reshape: (-1x56x56x192xf32) <- (-1x8x7x8x7x192xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(transpose_13, full_int_array_10) + del full_int_array_10, transpose_13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [3, 3] + + # pd_op.roll: (-1x56x56x192xf32) <- (-1x56x56x192xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_28, full_int_array_29, [1, 2]) + del reshape_28 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_11 = [slice_8, full_9, full_1] + del full_9, slice_8 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_11 = paddle._C_ops.stack(combine_11, 0) + del combine_11 + + # pd_op.reshape: (-1x3136x192xf32) <- (-1x56x56x192xf32, 3xi64) + reshape_29 = paddle._C_ops.reshape(roll_1, stack_11) + del roll_1, stack_11 + + # pd_op.add: (-1x3136x192xf32) <- (-1x3136x192xf32, -1x3136x192xf32) + add_13 = paddle._C_ops.add(add_8, reshape_29) + del add_8, reshape_29 + + # pd_op.layer_norm: (-1x3136x192xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x192xf32, 192xf32, 192xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_13, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (-1x3136x768xf32) <- (-1x3136x192xf32, 192x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del layer_norm_12, parameter_280 + + # pd_op.add: (-1x3136x768xf32) <- (-1x3136x768xf32, 768xf32) + add_14 = paddle._C_ops.add(matmul_10, parameter_279) + del matmul_10, parameter_279 + + # pd_op.gelu: (-1x3136x768xf32) <- (-1x3136x768xf32) + gelu_1 = paddle._C_ops.gelu(add_14, False) + del add_14 + + # pd_op.matmul: (-1x3136x192xf32) <- (-1x3136x768xf32, 768x192xf32) + matmul_11 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del gelu_1, parameter_278 + + # pd_op.add: (-1x3136x192xf32) <- (-1x3136x192xf32, 192xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_277) + del matmul_11, parameter_277 + + # pd_op.add: (-1x3136x192xf32) <- (-1x3136x192xf32, -1x3136x192xf32) + add_16 = paddle._C_ops.add(add_13, add_15) + del add_13, add_15 + + # pd_op.shape64: (3xi64) <- (-1x3136x192xf32) + shape64_9 = paddle._C_ops.shape64(add_16) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + shape64_9, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_9 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_12 = [slice_15, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_12 = paddle._C_ops.stack(combine_12, 0) + del combine_12 + + # pd_op.reshape: (-1x56x56x192xf32) <- (-1x3136x192xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_16, stack_12) + del add_16, stack_12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_30 = [2, 2] + + # pd_op.strided_slice: (-1x28x28x192xf32) <- (-1x56x56x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_31 = [1, 0] + + # pd_op.strided_slice: (-1x28x28x192xf32) <- (-1x56x56x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [0, 1] + + # pd_op.strided_slice: (-1x28x28x192xf32) <- (-1x56x56x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x28x28x192xf32) <- (-1x56x56x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + + # pd_op.shape64: (4xi64) <- (-1x56x56x192xf32) + shape64_10 = paddle._C_ops.shape64(reshape_30) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + shape64_10, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_10 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_13 = [slice_16, full_0, full_0, full_1] + del full_0, full_1, slice_16 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_13 = paddle._C_ops.stack(combine_13, 0) + del combine_13 + + # pd_op.reshape: (-1x56x56x192xf32) <- (-1x56x56x192xf32, 4xi64) + reshape_31 = paddle._C_ops.reshape(reshape_30, stack_13) + del reshape_30, stack_13 + + # pd_op.full: (1xi32) <- () + full_16 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([-1x28x28x192xf32, -1x28x28x192xf32, -1x28x28x192xf32, -1x28x28x192xf32]) <- (-1x28x28x192xf32, -1x28x28x192xf32, -1x28x28x192xf32, -1x28x28x192xf32) + combine_14 = [ + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + ] + del strided_slice_0, strided_slice_1, strided_slice_2, strided_slice_3 + + # pd_op.concat: (-1x28x28x768xf32) <- ([-1x28x28x192xf32, -1x28x28x192xf32, -1x28x28x192xf32, -1x28x28x192xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_14, full_16) + del combine_14 + + # pd_op.full: (xi64) <- () + full_17 = paddle._C_ops.full( + [], float("-1"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_18 = paddle._C_ops.full( + [], float("768"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_15 = [slice_15, full_17, full_18] + del slice_15 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_14 = paddle._C_ops.stack(combine_15, 0) + del combine_15 + + # pd_op.reshape: (-1x-1x768xf32) <- (-1x28x28x768xf32, 3xi64) + reshape_32 = paddle._C_ops.reshape(concat_0, stack_14) + del concat_0, stack_14 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_32, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276, reshape_32 + + # pd_op.matmul: (-1x-1x384xf32) <- (-1x-1x768xf32, 768x384xf32) + matmul_12 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del layer_norm_15, parameter_274 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_11 = paddle._C_ops.shape64(matmul_12) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + shape64_11, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_11 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_12 = paddle._C_ops.shape64(matmul_12) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + shape64_12, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_12 + + # pd_op.layer_norm: (-1x-1x384xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x384xf32, 384xf32, 384xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_12, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full: (xi64) <- () + full_19 = paddle._C_ops.full( + [], float("28"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_20 = paddle._C_ops.full( + [], float("384"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_16 = [slice_17, full_19, full_19, full_20] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_15 = paddle._C_ops.stack(combine_16, 0) + del combine_16 + + # pd_op.reshape: (-1x28x28x384xf32) <- (-1x-1x384xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(layer_norm_18, stack_15) + del layer_norm_18, stack_15 + + # pd_op.shape64: (4xi64) <- (-1x28x28x384xf32) + shape64_13 = paddle._C_ops.shape64(reshape_33) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + shape64_13, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_13 + + # pd_op.full: (xi64) <- () + full_21 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_17 = [slice_19, full_21, full_3, full_21, full_3, full_20] + del slice_19 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_16 = paddle._C_ops.stack(combine_17, 0) + del combine_17 + + # pd_op.reshape: (-1x4x7x4x7x384xf32) <- (-1x28x28x384xf32, 6xi64) + reshape_34 = paddle._C_ops.reshape(reshape_33, stack_16) + del reshape_33, stack_16 + + # pd_op.transpose: (-1x4x4x7x7x384xf32) <- (-1x4x7x4x7x384xf32) + transpose_14 = paddle._C_ops.transpose(reshape_34, [0, 1, 3, 2, 4, 5]) + del reshape_34 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_33 = [-1, 7, 7, 384] + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x4x4x7x7x384xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_14, full_int_array_33) + del transpose_14 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_34 = [-1, 49, 384] + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(reshape_35, full_int_array_34) + del reshape_35 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_14 = paddle._C_ops.shape64(reshape_36) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + shape64_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_14 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_13 = paddle._C_ops.matmul(reshape_36, parameter_271, False, False) + del parameter_271, reshape_36 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_17 = paddle._C_ops.add(matmul_13, parameter_270) + del matmul_13, parameter_270 + + # pd_op.full: (xi64) <- () + full_22 = paddle._C_ops.full( + [], float("12"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_18 = [slice_20, full_4, full_5, full_22, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_17 = paddle._C_ops.stack(combine_18, 0) + del combine_18 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_37 = paddle._C_ops.reshape(add_17, stack_17) + del add_17, stack_17 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_15 = paddle._C_ops.transpose(reshape_37, [2, 0, 3, 1, 4]) + del reshape_37 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_15 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_21, full_8, float("0"), True) + del slice_21 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_16 = paddle._C_ops.transpose(slice_22, [0, 1, 3, 2]) + del slice_22 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_14 = paddle._C_ops.matmul(scale_2, transpose_16, False, False) + del scale_2, transpose_16 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_38 = paddle._C_ops.reshape(data_5, full_int_array_7) + del data_5 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_2 = paddle._C_ops.index_select(data_6, reshape_38, 0) + del data_6, reshape_38 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(index_select_2, full_int_array_8) + del index_select_2 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_17 = paddle._C_ops.transpose(reshape_39, [2, 0, 1]) + del reshape_39 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_17, full_int_array_0) + del transpose_17 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_18 = paddle._C_ops.add(matmul_14, unsqueeze_6) + del matmul_14, unsqueeze_6 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_2 = paddle._C_ops.softmax(add_18, -1) + del add_18 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_15 = paddle._C_ops.matmul(softmax_2, slice_23, False, False) + del slice_23, softmax_2 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_18 = paddle._C_ops.transpose(matmul_15, [0, 2, 1, 3]) + del matmul_15 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_19 = [slice_20, full_4, full_20] + del slice_20 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_18 = paddle._C_ops.stack(combine_19, 0) + del combine_19 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_40 = paddle._C_ops.reshape(transpose_18, stack_18) + del stack_18, transpose_18 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_16 = paddle._C_ops.matmul(reshape_40, parameter_269, False, False) + del parameter_269, reshape_40 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_268) + del matmul_16, parameter_268 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_19, full_int_array_33) + del add_19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [-1, 4, 4, 7, 7, 384] + + # pd_op.reshape: (-1x4x4x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_42 = paddle._C_ops.reshape(reshape_41, full_int_array_35) + del reshape_41 + + # pd_op.transpose: (-1x4x7x4x7x384xf32) <- (-1x4x4x7x7x384xf32) + transpose_19 = paddle._C_ops.transpose(reshape_42, [0, 1, 3, 2, 4, 5]) + del reshape_42 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 28, 28, 384] + + # pd_op.reshape: (-1x28x28x384xf32) <- (-1x4x7x4x7x384xf32, 4xi64) + reshape_43 = paddle._C_ops.reshape(transpose_19, full_int_array_36) + del transpose_19 + + # pd_op.full: (xi64) <- () + full_23 = paddle._C_ops.full( + [], float("784"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_20 = [slice_17, full_23, full_20] + del slice_17 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_19 = paddle._C_ops.stack(combine_20, 0) + del combine_20 + + # pd_op.reshape: (-1x784x384xf32) <- (-1x28x28x384xf32, 3xi64) + reshape_44 = paddle._C_ops.reshape(reshape_43, stack_19) + del reshape_43, stack_19 + + # pd_op.add: (-1x784x384xf32) <- (-1x-1x384xf32, -1x784x384xf32) + add_20 = paddle._C_ops.add(matmul_12, reshape_44) + del matmul_12, reshape_44 + + # pd_op.layer_norm: (-1x784x384xf32, -1x784xf32, -1x784xf32) <- (-1x784x384xf32, 384xf32, 384xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (-1x784x1536xf32) <- (-1x784x384xf32, 384x1536xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del layer_norm_21, parameter_265 + + # pd_op.add: (-1x784x1536xf32) <- (-1x784x1536xf32, 1536xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_264) + del matmul_17, parameter_264 + + # pd_op.gelu: (-1x784x1536xf32) <- (-1x784x1536xf32) + gelu_2 = paddle._C_ops.gelu(add_21, False) + del add_21 + + # pd_op.matmul: (-1x784x384xf32) <- (-1x784x1536xf32, 1536x384xf32) + matmul_18 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del gelu_2, parameter_263 + + # pd_op.add: (-1x784x384xf32) <- (-1x784x384xf32, 384xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_262) + del matmul_18, parameter_262 + + # pd_op.add: (-1x784x384xf32) <- (-1x784x384xf32, -1x784x384xf32) + add_23 = paddle._C_ops.add(add_20, add_22) + del add_20, add_22 + + # pd_op.shape64: (3xi64) <- (-1x784x384xf32) + shape64_15 = paddle._C_ops.shape64(add_23) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + shape64_15, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_15 + + # pd_op.layer_norm: (-1x784x384xf32, -1x784xf32, -1x784xf32) <- (-1x784x384xf32, 384xf32, 384xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_23, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_21 = [slice_24, full_19, full_19, full_20] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_20 = paddle._C_ops.stack(combine_21, 0) + del combine_21 + + # pd_op.reshape: (-1x28x28x384xf32) <- (-1x784x384xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(layer_norm_24, stack_20) + del layer_norm_24, stack_20 + + # pd_op.shape64: (4xi64) <- (-1x28x28x384xf32) + shape64_16 = paddle._C_ops.shape64(reshape_45) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + shape64_16, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_16 + + # pd_op.roll: (-1x28x28x384xf32) <- (-1x28x28x384xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_45, full_int_array_11, [1, 2]) + del reshape_45 + + # pd_op.shape64: (4xi64) <- (-1x28x28x384xf32) + shape64_17 = paddle._C_ops.shape64(roll_2) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + shape64_17, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_17 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_22 = [slice_26, full_21, full_3, full_21, full_3, full_20] + del slice_26 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_21 = paddle._C_ops.stack(combine_22, 0) + del combine_22 + + # pd_op.reshape: (-1x4x7x4x7x384xf32) <- (-1x28x28x384xf32, 6xi64) + reshape_46 = paddle._C_ops.reshape(roll_2, stack_21) + del roll_2, stack_21 + + # pd_op.transpose: (-1x4x4x7x7x384xf32) <- (-1x4x7x4x7x384xf32) + transpose_20 = paddle._C_ops.transpose(reshape_46, [0, 1, 3, 2, 4, 5]) + del reshape_46 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x4x4x7x7x384xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(transpose_20, full_int_array_33) + del transpose_20 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_48 = paddle._C_ops.reshape(reshape_47, full_int_array_34) + del full_int_array_34, reshape_47 + + # pd_op.full: (1x28x28x1xf32) <- () + full_24 = paddle._C_ops.full( + [1, 28, 28, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_24, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_24 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_37 = [1, 4, 7, 4, 7, 1] + + # pd_op.reshape: (1x4x7x4x7x1xf32) <- (1x28x28x1xf32, 6xi64) + reshape_49 = paddle._C_ops.reshape(set_value__1, full_int_array_37) + del full_int_array_37 + + # pd_op.transpose: (1x4x4x7x7x1xf32) <- (1x4x7x4x7x1xf32) + transpose_21 = paddle._C_ops.transpose(reshape_49, [0, 1, 3, 2, 4, 5]) + del reshape_49 + + # pd_op.reshape: (16x7x7x1xf32) <- (1x4x4x7x7x1xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(transpose_21, full_int_array_27) + del transpose_21 + + # pd_op.reshape: (16x49xf32) <- (16x7x7x1xf32, 2xi64) + reshape_51 = paddle._C_ops.reshape(reshape_50, full_int_array_28) + del reshape_50 + + # pd_op.unsqueeze: (16x1x49xf32) <- (16x49xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(reshape_51, full_int_array_1) + + # pd_op.unsqueeze: (16x49x1xf32) <- (16x49xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(reshape_51, full_int_array_5) + del reshape_51 + + # pd_op.subtract: (16x49x49xf32) <- (16x1x49xf32, 16x49x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_7, unsqueeze_8) + del unsqueeze_7, unsqueeze_8 + + # pd_op.not_equal: (16x49x49xb) <- (16x49x49xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_11) + + # pd_op.full: (16x49x49xf32) <- () + full_25 = paddle._C_ops.full( + [16, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_25, subtract_1) + del full_25, not_equal_1, subtract_1 + + # pd_op.equal: (16x49x49xb) <- (16x49x49xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_11) + + # pd_op.full: (16x49x49xf32) <- () + full_26 = paddle._C_ops.full( + [16, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_3 = paddle._C_ops.where(equal_1, full_26, where_2) + del equal_1, full_26, where_2 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_18 = paddle._C_ops.shape64(reshape_48) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + shape64_18, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_18 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_19 = paddle._C_ops.matmul(reshape_48, parameter_259, False, False) + del parameter_259, reshape_48 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_24 = paddle._C_ops.add(matmul_19, parameter_258) + del matmul_19, parameter_258 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_23 = [slice_27, full_4, full_5, full_22, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_22 = paddle._C_ops.stack(combine_23, 0) + del combine_23 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_52 = paddle._C_ops.reshape(add_24, stack_22) + del add_24, stack_22 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_22 = paddle._C_ops.transpose(reshape_52, [2, 0, 3, 1, 4]) + del reshape_52 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_22 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_28, full_8, float("0"), True) + del slice_28 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_23 = paddle._C_ops.transpose(slice_29, [0, 1, 3, 2]) + del slice_29 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_20 = paddle._C_ops.matmul(scale_3, transpose_23, False, False) + del scale_3, transpose_23 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_53 = paddle._C_ops.reshape(data_7, full_int_array_7) + del data_7 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_3 = paddle._C_ops.index_select(data_8, reshape_53, 0) + del data_8, reshape_53 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_54 = paddle._C_ops.reshape(index_select_3, full_int_array_8) + del index_select_3 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_24 = paddle._C_ops.transpose(reshape_54, [2, 0, 1]) + del reshape_54 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_24, full_int_array_0) + del transpose_24 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_25 = paddle._C_ops.add(matmul_20, unsqueeze_9) + del matmul_20, unsqueeze_9 + + # pd_op.full: (xi64) <- () + full_27 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_1 = paddle._C_ops.floor_divide(slice_27, full_27) + del full_27 + + # pd_op.full: (xi64) <- () + full_28 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_24 = [floor_divide_1, full_28, full_22, full_4, full_4] + del floor_divide_1, full_28 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_23 = paddle._C_ops.stack(combine_24, 0) + del combine_24 + + # pd_op.reshape: (-1x16x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_55 = paddle._C_ops.reshape(add_25, stack_23) + del add_25, stack_23 + + # pd_op.unsqueeze: (16x1x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(where_3, full_int_array_1) + del where_3 + + # pd_op.unsqueeze: (1x16x1x49x49xf32) <- (16x1x49x49xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_10, full_int_array_0) + del unsqueeze_10 + + # pd_op.add: (-1x16x12x49x49xf32) <- (-1x16x12x49x49xf32, 1x16x1x49x49xf32) + add_26 = paddle._C_ops.add(reshape_55, unsqueeze_11) + del reshape_55, unsqueeze_11 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_25 = [slice_27, full_22, full_4, full_4] + del full_22 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_24 = paddle._C_ops.stack(combine_25, 0) + del combine_25 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x16x12x49x49xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(add_26, stack_24) + del add_26, stack_24 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_3 = paddle._C_ops.softmax(reshape_56, -1) + del reshape_56 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_21 = paddle._C_ops.matmul(softmax_3, slice_30, False, False) + del slice_30, softmax_3 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_25 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_26 = [slice_27, full_4, full_20] + del slice_27 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_25 = paddle._C_ops.stack(combine_26, 0) + del combine_26 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_57 = paddle._C_ops.reshape(transpose_25, stack_25) + del stack_25, transpose_25 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_22 = paddle._C_ops.matmul(reshape_57, parameter_257, False, False) + del parameter_257, reshape_57 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_27 = paddle._C_ops.add(matmul_22, parameter_256) + del matmul_22, parameter_256 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(add_27, full_int_array_33) + del add_27, full_int_array_33 + + # pd_op.reshape: (-1x4x4x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_59 = paddle._C_ops.reshape(reshape_58, full_int_array_35) + del full_int_array_35, reshape_58 + + # pd_op.transpose: (-1x4x7x4x7x384xf32) <- (-1x4x4x7x7x384xf32) + transpose_26 = paddle._C_ops.transpose(reshape_59, [0, 1, 3, 2, 4, 5]) + del reshape_59 + + # pd_op.reshape: (-1x28x28x384xf32) <- (-1x4x7x4x7x384xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(transpose_26, full_int_array_36) + del full_int_array_36, transpose_26 + + # pd_op.roll: (-1x28x28x384xf32) <- (-1x28x28x384xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_60, full_int_array_29, [1, 2]) + del reshape_60 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_27 = [slice_24, full_23, full_20] + del full_23, slice_24 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_26 = paddle._C_ops.stack(combine_27, 0) + del combine_27 + + # pd_op.reshape: (-1x784x384xf32) <- (-1x28x28x384xf32, 3xi64) + reshape_61 = paddle._C_ops.reshape(roll_3, stack_26) + del roll_3, stack_26 + + # pd_op.add: (-1x784x384xf32) <- (-1x784x384xf32, -1x784x384xf32) + add_28 = paddle._C_ops.add(add_23, reshape_61) + del add_23, reshape_61 + + # pd_op.layer_norm: (-1x784x384xf32, -1x784xf32, -1x784xf32) <- (-1x784x384xf32, 384xf32, 384xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_28, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (-1x784x1536xf32) <- (-1x784x384xf32, 384x1536xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del layer_norm_27, parameter_253 + + # pd_op.add: (-1x784x1536xf32) <- (-1x784x1536xf32, 1536xf32) + add_29 = paddle._C_ops.add(matmul_23, parameter_252) + del matmul_23, parameter_252 + + # pd_op.gelu: (-1x784x1536xf32) <- (-1x784x1536xf32) + gelu_3 = paddle._C_ops.gelu(add_29, False) + del add_29 + + # pd_op.matmul: (-1x784x384xf32) <- (-1x784x1536xf32, 1536x384xf32) + matmul_24 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del gelu_3, parameter_251 + + # pd_op.add: (-1x784x384xf32) <- (-1x784x384xf32, 384xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_250) + del matmul_24, parameter_250 + + # pd_op.add: (-1x784x384xf32) <- (-1x784x384xf32, -1x784x384xf32) + add_31 = paddle._C_ops.add(add_28, add_30) + del add_28, add_30 + + # pd_op.shape64: (3xi64) <- (-1x784x384xf32) + shape64_19 = paddle._C_ops.shape64(add_31) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + shape64_19, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_19 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_28 = [slice_31, full_19, full_19, full_20] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_27 = paddle._C_ops.stack(combine_28, 0) + del combine_28 + + # pd_op.reshape: (-1x28x28x384xf32) <- (-1x784x384xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(add_31, stack_27) + del add_31, stack_27 + + # pd_op.strided_slice: (-1x14x14x384xf32) <- (-1x28x28x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x14x14x384xf32) <- (-1x28x28x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x14x14x384xf32) <- (-1x28x28x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x14x14x384xf32) <- (-1x28x28x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + + # pd_op.shape64: (4xi64) <- (-1x28x28x384xf32) + shape64_20 = paddle._C_ops.shape64(reshape_62) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + shape64_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_20 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_29 = [slice_32, full_19, full_19, full_20] + del full_19, full_20, slice_32 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_28 = paddle._C_ops.stack(combine_29, 0) + del combine_29 + + # pd_op.reshape: (-1x28x28x384xf32) <- (-1x28x28x384xf32, 4xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, stack_28) + del reshape_62, stack_28 + + # builtin.combine: ([-1x14x14x384xf32, -1x14x14x384xf32, -1x14x14x384xf32, -1x14x14x384xf32]) <- (-1x14x14x384xf32, -1x14x14x384xf32, -1x14x14x384xf32, -1x14x14x384xf32) + combine_30 = [ + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + ] + del strided_slice_4, strided_slice_5, strided_slice_6, strided_slice_7 + + # pd_op.concat: (-1x14x14x1536xf32) <- ([-1x14x14x384xf32, -1x14x14x384xf32, -1x14x14x384xf32, -1x14x14x384xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_30, full_16) + del combine_30 + + # pd_op.full: (xi64) <- () + full_29 = paddle._C_ops.full( + [], float("1536"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_31 = [slice_31, full_17, full_29] + del slice_31 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_29 = paddle._C_ops.stack(combine_31, 0) + del combine_31 + + # pd_op.reshape: (-1x-1x1536xf32) <- (-1x14x14x1536xf32, 3xi64) + reshape_64 = paddle._C_ops.reshape(concat_1, stack_29) + del concat_1, stack_29 + + # pd_op.layer_norm: (-1x-1x1536xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1536xf32, 1536xf32, 1536xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_64, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249, reshape_64 + + # pd_op.matmul: (-1x-1x768xf32) <- (-1x-1x1536xf32, 1536x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del layer_norm_30, parameter_247 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_21 = paddle._C_ops.shape64(matmul_25) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + shape64_21, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_21 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_22 = paddle._C_ops.shape64(matmul_25) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + shape64_22, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_22 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_25, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full: (xi64) <- () + full_30 = paddle._C_ops.full( + [], float("14"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_32 = [slice_33, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_30 = paddle._C_ops.stack(combine_32, 0) + del combine_32 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x-1x768xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(layer_norm_33, stack_30) + del layer_norm_33, stack_30 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_23 = paddle._C_ops.shape64(reshape_65) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + shape64_23, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_23 + + # pd_op.full: (xi64) <- () + full_31 = paddle._C_ops.full( + [], float("2"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_33 = [slice_35, full_31, full_3, full_31, full_3, full_18] + del slice_35 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_31 = paddle._C_ops.stack(combine_33, 0) + del combine_33 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, stack_31) + del reshape_65, stack_31 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_27 = paddle._C_ops.transpose(reshape_66, [0, 1, 3, 2, 4, 5]) + del reshape_66 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_38 = [-1, 7, 7, 768] + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_67 = paddle._C_ops.reshape(transpose_27, full_int_array_38) + del transpose_27 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_39 = [-1, 49, 768] + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_68 = paddle._C_ops.reshape(reshape_67, full_int_array_39) + del reshape_67 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_24 = paddle._C_ops.shape64(reshape_68) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + shape64_24, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_24 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_26 = paddle._C_ops.matmul(reshape_68, parameter_244, False, False) + del parameter_244, reshape_68 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_32 = paddle._C_ops.add(matmul_26, parameter_243) + del matmul_26, parameter_243 + + # pd_op.full: (xi64) <- () + full_32 = paddle._C_ops.full( + [], float("24"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_34 = [slice_36, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_32 = paddle._C_ops.stack(combine_34, 0) + del combine_34 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_69 = paddle._C_ops.reshape(add_32, stack_32) + del add_32, stack_32 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_28 = paddle._C_ops.transpose(reshape_69, [2, 0, 3, 1, 4]) + del reshape_69 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_28 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_37, full_8, float("0"), True) + del slice_37 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_29 = paddle._C_ops.transpose(slice_38, [0, 1, 3, 2]) + del slice_38 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_27 = paddle._C_ops.matmul(scale_4, transpose_29, False, False) + del scale_4, transpose_29 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_70 = paddle._C_ops.reshape(data_9, full_int_array_7) + del data_9 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_4 = paddle._C_ops.index_select(data_10, reshape_70, 0) + del data_10, reshape_70 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_71 = paddle._C_ops.reshape(index_select_4, full_int_array_8) + del index_select_4 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_30 = paddle._C_ops.transpose(reshape_71, [2, 0, 1]) + del reshape_71 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_30, full_int_array_0) + del transpose_30 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_33 = paddle._C_ops.add(matmul_27, unsqueeze_12) + del matmul_27, unsqueeze_12 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_4 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_28 = paddle._C_ops.matmul(softmax_4, slice_39, False, False) + del slice_39, softmax_4 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_31 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_35 = [slice_36, full_4, full_18] + del slice_36 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_33 = paddle._C_ops.stack(combine_35, 0) + del combine_35 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(transpose_31, stack_33) + del stack_33, transpose_31 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_29 = paddle._C_ops.matmul(reshape_72, parameter_242, False, False) + del parameter_242, reshape_72 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_34 = paddle._C_ops.add(matmul_29, parameter_241) + del matmul_29, parameter_241 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(add_34, full_int_array_38) + del add_34 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_40 = [-1, 2, 2, 7, 7, 768] + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_74 = paddle._C_ops.reshape(reshape_73, full_int_array_40) + del reshape_73 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_32 = paddle._C_ops.transpose(reshape_74, [0, 1, 3, 2, 4, 5]) + del reshape_74 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_41 = [-1, 14, 14, 768] + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_75 = paddle._C_ops.reshape(transpose_32, full_int_array_41) + del transpose_32 + + # pd_op.full: (xi64) <- () + full_33 = paddle._C_ops.full( + [], float("196"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_36 = [slice_33, full_33, full_18] + del slice_33 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_34 = paddle._C_ops.stack(combine_36, 0) + del combine_36 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_76 = paddle._C_ops.reshape(reshape_75, stack_34) + del reshape_75, stack_34 + + # pd_op.add: (-1x196x768xf32) <- (-1x-1x768xf32, -1x196x768xf32) + add_35 = paddle._C_ops.add(matmul_25, reshape_76) + del matmul_25, reshape_76 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del layer_norm_36, parameter_238 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_36 = paddle._C_ops.add(matmul_30, parameter_237) + del matmul_30, parameter_237 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_31 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del gelu_4, parameter_236 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_37 = paddle._C_ops.add(matmul_31, parameter_235) + del matmul_31, parameter_235 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_38 = paddle._C_ops.add(add_35, add_37) + del add_35, add_37 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_25 = paddle._C_ops.shape64(add_38) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + shape64_25, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_25 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_37 = [slice_40, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_35 = paddle._C_ops.stack(combine_37, 0) + del combine_37 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(layer_norm_39, stack_35) + del layer_norm_39, stack_35 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_26 = paddle._C_ops.shape64(reshape_77) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + shape64_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_26 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_77, full_int_array_11, [1, 2]) + del reshape_77 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_27 = paddle._C_ops.shape64(roll_4) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + shape64_27, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_38 = [slice_42, full_31, full_3, full_31, full_3, full_18] + del slice_42 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_36 = paddle._C_ops.stack(combine_38, 0) + del combine_38 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_78 = paddle._C_ops.reshape(roll_4, stack_36) + del roll_4, stack_36 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_33 = paddle._C_ops.transpose(reshape_78, [0, 1, 3, 2, 4, 5]) + del reshape_78 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_33, full_int_array_38) + del transpose_33 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, full_int_array_39) + del reshape_79 + + # pd_op.full: (1x14x14x1xf32) <- () + full_34 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_34, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_42 = [1, 2, 7, 2, 7, 1] + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_81 = paddle._C_ops.reshape(set_value__2, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_34 = paddle._C_ops.transpose(reshape_81, [0, 1, 3, 2, 4, 5]) + del reshape_81 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_34, full_int_array_27) + del transpose_34 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_28) + del reshape_82 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(reshape_83, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(reshape_83, full_int_array_5) + del reshape_83 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_13, unsqueeze_14) + del unsqueeze_13, unsqueeze_14 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_11) + + # pd_op.full: (4x49x49xf32) <- () + full_35 = paddle._C_ops.full( + [4, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_35, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_11) + + # pd_op.full: (4x49x49xf32) <- () + full_36 = paddle._C_ops.full( + [4, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_5 = paddle._C_ops.where(equal_2, full_36, where_4) + del equal_2, where_4 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_28 = paddle._C_ops.shape64(reshape_80) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + shape64_28, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_28 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_32 = paddle._C_ops.matmul(reshape_80, parameter_232, False, False) + del parameter_232, reshape_80 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_39 = paddle._C_ops.add(matmul_32, parameter_231) + del matmul_32, parameter_231 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_39 = [slice_43, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_37 = paddle._C_ops.stack(combine_39, 0) + del combine_39 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_84 = paddle._C_ops.reshape(add_39, stack_37) + del add_39, stack_37 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_35 = paddle._C_ops.transpose(reshape_84, [2, 0, 3, 1, 4]) + del reshape_84 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_35 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_44, full_8, float("0"), True) + del slice_44 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_36 = paddle._C_ops.transpose(slice_45, [0, 1, 3, 2]) + del slice_45 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_33 = paddle._C_ops.matmul(scale_5, transpose_36, False, False) + del scale_5, transpose_36 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_85 = paddle._C_ops.reshape(data_11, full_int_array_7) + del data_11 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_5 = paddle._C_ops.index_select(data_12, reshape_85, 0) + del data_12, reshape_85 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(index_select_5, full_int_array_8) + del index_select_5 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_37 = paddle._C_ops.transpose(reshape_86, [2, 0, 1]) + del reshape_86 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_37, full_int_array_0) + del transpose_37 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_40 = paddle._C_ops.add(matmul_33, unsqueeze_15) + del matmul_33, unsqueeze_15 + + # pd_op.full: (xi64) <- () + full_37 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_2 = paddle._C_ops.floor_divide(slice_43, full_37) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_40 = [floor_divide_2, full_21, full_32, full_4, full_4] + del floor_divide_2 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_38 = paddle._C_ops.stack(combine_40, 0) + del combine_40 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_87 = paddle._C_ops.reshape(add_40, stack_38) + del add_40, stack_38 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(where_5, full_int_array_1) + del where_5 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_16, full_int_array_0) + del unsqueeze_16 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_41 = paddle._C_ops.add(reshape_87, unsqueeze_17) + del reshape_87, unsqueeze_17 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_41 = [slice_43, full_32, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_39 = paddle._C_ops.stack(combine_41, 0) + del combine_41 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(add_41, stack_39) + del add_41, stack_39 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_5 = paddle._C_ops.softmax(reshape_88, -1) + del reshape_88 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_34 = paddle._C_ops.matmul(softmax_5, slice_46, False, False) + del slice_46, softmax_5 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_38 = paddle._C_ops.transpose(matmul_34, [0, 2, 1, 3]) + del matmul_34 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_42 = [slice_43, full_4, full_18] + del slice_43 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_40 = paddle._C_ops.stack(combine_42, 0) + del combine_42 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(transpose_38, stack_40) + del stack_40, transpose_38 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_35 = paddle._C_ops.matmul(reshape_89, parameter_230, False, False) + del parameter_230, reshape_89 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_42 = paddle._C_ops.add(matmul_35, parameter_229) + del matmul_35, parameter_229 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(add_42, full_int_array_38) + del add_42 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_91 = paddle._C_ops.reshape(reshape_90, full_int_array_40) + del reshape_90 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_39 = paddle._C_ops.transpose(reshape_91, [0, 1, 3, 2, 4, 5]) + del reshape_91 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(transpose_39, full_int_array_41) + del transpose_39 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_92, full_int_array_29, [1, 2]) + del reshape_92 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_43 = [slice_40, full_33, full_18] + del slice_40 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_41 = paddle._C_ops.stack(combine_43, 0) + del combine_43 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_93 = paddle._C_ops.reshape(roll_5, stack_41) + del roll_5, stack_41 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_43 = paddle._C_ops.add(add_38, reshape_93) + del add_38, reshape_93 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_43, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_36 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del layer_norm_42, parameter_226 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_44 = paddle._C_ops.add(matmul_36, parameter_225) + del matmul_36, parameter_225 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_44, False) + del add_44 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_37 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del gelu_5, parameter_224 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_45 = paddle._C_ops.add(matmul_37, parameter_223) + del matmul_37, parameter_223 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_46 = paddle._C_ops.add(add_43, add_45) + del add_43, add_45 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_29 = paddle._C_ops.shape64(add_46) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + shape64_29, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_29 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_46, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_44 = [slice_47, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_42 = paddle._C_ops.stack(combine_44, 0) + del combine_44 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_94 = paddle._C_ops.reshape(layer_norm_45, stack_42) + del layer_norm_45, stack_42 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_30 = paddle._C_ops.shape64(reshape_94) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + shape64_30, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_30 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_45 = [slice_48, full_31, full_3, full_31, full_3, full_18] + del slice_48 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_43 = paddle._C_ops.stack(combine_45, 0) + del combine_45 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_95 = paddle._C_ops.reshape(reshape_94, stack_43) + del reshape_94, stack_43 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_40 = paddle._C_ops.transpose(reshape_95, [0, 1, 3, 2, 4, 5]) + del reshape_95 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_40, full_int_array_38) + del transpose_40 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, full_int_array_39) + del reshape_96 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_31 = paddle._C_ops.shape64(reshape_97) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + shape64_31, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_31 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_38 = paddle._C_ops.matmul(reshape_97, parameter_220, False, False) + del parameter_220, reshape_97 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_47 = paddle._C_ops.add(matmul_38, parameter_219) + del matmul_38, parameter_219 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_46 = [slice_49, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_44 = paddle._C_ops.stack(combine_46, 0) + del combine_46 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_98 = paddle._C_ops.reshape(add_47, stack_44) + del add_47, stack_44 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_41 = paddle._C_ops.transpose(reshape_98, [2, 0, 3, 1, 4]) + del reshape_98 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_41 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_50, full_8, float("0"), True) + del slice_50 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_42 = paddle._C_ops.transpose(slice_51, [0, 1, 3, 2]) + del slice_51 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_39 = paddle._C_ops.matmul(scale_6, transpose_42, False, False) + del scale_6, transpose_42 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_99 = paddle._C_ops.reshape(data_13, full_int_array_7) + del data_13 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_6 = paddle._C_ops.index_select(data_14, reshape_99, 0) + del data_14, reshape_99 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(index_select_6, full_int_array_8) + del index_select_6 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_43 = paddle._C_ops.transpose(reshape_100, [2, 0, 1]) + del reshape_100 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_43, full_int_array_0) + del transpose_43 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_48 = paddle._C_ops.add(matmul_39, unsqueeze_18) + del matmul_39, unsqueeze_18 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_6 = paddle._C_ops.softmax(add_48, -1) + del add_48 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_40 = paddle._C_ops.matmul(softmax_6, slice_52, False, False) + del slice_52, softmax_6 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_44 = paddle._C_ops.transpose(matmul_40, [0, 2, 1, 3]) + del matmul_40 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_47 = [slice_49, full_4, full_18] + del slice_49 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_45 = paddle._C_ops.stack(combine_47, 0) + del combine_47 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_101 = paddle._C_ops.reshape(transpose_44, stack_45) + del stack_45, transpose_44 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(reshape_101, parameter_218, False, False) + del parameter_218, reshape_101 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_49 = paddle._C_ops.add(matmul_41, parameter_217) + del matmul_41, parameter_217 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_102 = paddle._C_ops.reshape(add_49, full_int_array_38) + del add_49 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_103 = paddle._C_ops.reshape(reshape_102, full_int_array_40) + del reshape_102 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_45 = paddle._C_ops.transpose(reshape_103, [0, 1, 3, 2, 4, 5]) + del reshape_103 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(transpose_45, full_int_array_41) + del transpose_45 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_48 = [slice_47, full_33, full_18] + del slice_47 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_46 = paddle._C_ops.stack(combine_48, 0) + del combine_48 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_105 = paddle._C_ops.reshape(reshape_104, stack_46) + del reshape_104, stack_46 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_50 = paddle._C_ops.add(add_46, reshape_105) + del add_46, reshape_105 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_50, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del layer_norm_48, parameter_214 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_51 = paddle._C_ops.add(matmul_42, parameter_213) + del matmul_42, parameter_213 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_51, False) + del add_51 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_43 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del gelu_6, parameter_212 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_52 = paddle._C_ops.add(matmul_43, parameter_211) + del matmul_43, parameter_211 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_53 = paddle._C_ops.add(add_50, add_52) + del add_50, add_52 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_32 = paddle._C_ops.shape64(add_53) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + shape64_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_32 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_49 = [slice_53, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_47 = paddle._C_ops.stack(combine_49, 0) + del combine_49 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_106 = paddle._C_ops.reshape(layer_norm_51, stack_47) + del layer_norm_51, stack_47 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_33 = paddle._C_ops.shape64(reshape_106) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + shape64_33, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_33 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_106, full_int_array_11, [1, 2]) + del reshape_106 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_34 = paddle._C_ops.shape64(roll_6) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + shape64_34, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_34 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_50 = [slice_55, full_31, full_3, full_31, full_3, full_18] + del slice_55 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_48 = paddle._C_ops.stack(combine_50, 0) + del combine_50 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_107 = paddle._C_ops.reshape(roll_6, stack_48) + del roll_6, stack_48 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_46 = paddle._C_ops.transpose(reshape_107, [0, 1, 3, 2, 4, 5]) + del reshape_107 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_46, full_int_array_38) + del transpose_46 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_39) + del reshape_108 + + # pd_op.full: (1x14x14x1xf32) <- () + full_38 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_38, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_110 = paddle._C_ops.reshape(set_value__3, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_47 = paddle._C_ops.transpose(reshape_110, [0, 1, 3, 2, 4, 5]) + del reshape_110 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_111 = paddle._C_ops.reshape(transpose_47, full_int_array_27) + del transpose_47 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_112 = paddle._C_ops.reshape(reshape_111, full_int_array_28) + del reshape_111 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(reshape_112, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(reshape_112, full_int_array_5) + del reshape_112 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_19, unsqueeze_20) + del unsqueeze_19, unsqueeze_20 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_35, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_7 = paddle._C_ops.where(equal_3, full_36, where_6) + del equal_3, where_6 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_35 = paddle._C_ops.shape64(reshape_109) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + shape64_35, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_35 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_44 = paddle._C_ops.matmul(reshape_109, parameter_208, False, False) + del parameter_208, reshape_109 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_54 = paddle._C_ops.add(matmul_44, parameter_207) + del matmul_44, parameter_207 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_51 = [slice_56, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_49 = paddle._C_ops.stack(combine_51, 0) + del combine_51 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_113 = paddle._C_ops.reshape(add_54, stack_49) + del add_54, stack_49 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_48 = paddle._C_ops.transpose(reshape_113, [2, 0, 3, 1, 4]) + del reshape_113 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_48 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_57, full_8, float("0"), True) + del slice_57 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_49 = paddle._C_ops.transpose(slice_58, [0, 1, 3, 2]) + del slice_58 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_45 = paddle._C_ops.matmul(scale_7, transpose_49, False, False) + del scale_7, transpose_49 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_114 = paddle._C_ops.reshape(data_15, full_int_array_7) + del data_15 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_7 = paddle._C_ops.index_select(data_16, reshape_114, 0) + del data_16, reshape_114 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_115 = paddle._C_ops.reshape(index_select_7, full_int_array_8) + del index_select_7 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_50 = paddle._C_ops.transpose(reshape_115, [2, 0, 1]) + del reshape_115 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_50, full_int_array_0) + del transpose_50 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_55 = paddle._C_ops.add(matmul_45, unsqueeze_21) + del matmul_45, unsqueeze_21 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_3 = paddle._C_ops.floor_divide(slice_56, full_37) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_52 = [floor_divide_3, full_21, full_32, full_4, full_4] + del floor_divide_3 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_50 = paddle._C_ops.stack(combine_52, 0) + del combine_52 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_116 = paddle._C_ops.reshape(add_55, stack_50) + del add_55, stack_50 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(where_7, full_int_array_1) + del where_7 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_22, full_int_array_0) + del unsqueeze_22 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_56 = paddle._C_ops.add(reshape_116, unsqueeze_23) + del reshape_116, unsqueeze_23 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_53 = [slice_56, full_32, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_51 = paddle._C_ops.stack(combine_53, 0) + del combine_53 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_117 = paddle._C_ops.reshape(add_56, stack_51) + del add_56, stack_51 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_7 = paddle._C_ops.softmax(reshape_117, -1) + del reshape_117 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_46 = paddle._C_ops.matmul(softmax_7, slice_59, False, False) + del slice_59, softmax_7 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_51 = paddle._C_ops.transpose(matmul_46, [0, 2, 1, 3]) + del matmul_46 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_54 = [slice_56, full_4, full_18] + del slice_56 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_52 = paddle._C_ops.stack(combine_54, 0) + del combine_54 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_118 = paddle._C_ops.reshape(transpose_51, stack_52) + del stack_52, transpose_51 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_47 = paddle._C_ops.matmul(reshape_118, parameter_206, False, False) + del parameter_206, reshape_118 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_57 = paddle._C_ops.add(matmul_47, parameter_205) + del matmul_47, parameter_205 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_119 = paddle._C_ops.reshape(add_57, full_int_array_38) + del add_57 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_120 = paddle._C_ops.reshape(reshape_119, full_int_array_40) + del reshape_119 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_52 = paddle._C_ops.transpose(reshape_120, [0, 1, 3, 2, 4, 5]) + del reshape_120 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(transpose_52, full_int_array_41) + del transpose_52 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_121, full_int_array_29, [1, 2]) + del reshape_121 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_55 = [slice_53, full_33, full_18] + del slice_53 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_53 = paddle._C_ops.stack(combine_55, 0) + del combine_55 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_122 = paddle._C_ops.reshape(roll_7, stack_53) + del roll_7, stack_53 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_58 = paddle._C_ops.add(add_53, reshape_122) + del add_53, reshape_122 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_58, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del layer_norm_54, parameter_202 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_59 = paddle._C_ops.add(matmul_48, parameter_201) + del matmul_48, parameter_201 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_59, False) + del add_59 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_49 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del gelu_7, parameter_200 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_60 = paddle._C_ops.add(matmul_49, parameter_199) + del matmul_49, parameter_199 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_61 = paddle._C_ops.add(add_58, add_60) + del add_58, add_60 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_36 = paddle._C_ops.shape64(add_61) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + shape64_36, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_36 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_61, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_56 = [slice_60, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_54 = paddle._C_ops.stack(combine_56, 0) + del combine_56 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_123 = paddle._C_ops.reshape(layer_norm_57, stack_54) + del layer_norm_57, stack_54 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_37 = paddle._C_ops.shape64(reshape_123) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + shape64_37, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_37 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_57 = [slice_61, full_31, full_3, full_31, full_3, full_18] + del slice_61 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_55 = paddle._C_ops.stack(combine_57, 0) + del combine_57 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_124 = paddle._C_ops.reshape(reshape_123, stack_55) + del reshape_123, stack_55 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_53 = paddle._C_ops.transpose(reshape_124, [0, 1, 3, 2, 4, 5]) + del reshape_124 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_53, full_int_array_38) + del transpose_53 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_39) + del reshape_125 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_38 = paddle._C_ops.shape64(reshape_126) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + shape64_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_38 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_50 = paddle._C_ops.matmul(reshape_126, parameter_196, False, False) + del parameter_196, reshape_126 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_62 = paddle._C_ops.add(matmul_50, parameter_195) + del matmul_50, parameter_195 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_58 = [slice_62, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_56 = paddle._C_ops.stack(combine_58, 0) + del combine_58 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_127 = paddle._C_ops.reshape(add_62, stack_56) + del add_62, stack_56 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_54 = paddle._C_ops.transpose(reshape_127, [2, 0, 3, 1, 4]) + del reshape_127 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_54 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_63, full_8, float("0"), True) + del slice_63 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_55 = paddle._C_ops.transpose(slice_64, [0, 1, 3, 2]) + del slice_64 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_51 = paddle._C_ops.matmul(scale_8, transpose_55, False, False) + del scale_8, transpose_55 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_128 = paddle._C_ops.reshape(data_17, full_int_array_7) + del data_17 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_8 = paddle._C_ops.index_select(data_18, reshape_128, 0) + del data_18, reshape_128 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_129 = paddle._C_ops.reshape(index_select_8, full_int_array_8) + del index_select_8 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_56 = paddle._C_ops.transpose(reshape_129, [2, 0, 1]) + del reshape_129 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_56, full_int_array_0) + del transpose_56 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_63 = paddle._C_ops.add(matmul_51, unsqueeze_24) + del matmul_51, unsqueeze_24 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_8 = paddle._C_ops.softmax(add_63, -1) + del add_63 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_52 = paddle._C_ops.matmul(softmax_8, slice_65, False, False) + del slice_65, softmax_8 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_57 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_59 = [slice_62, full_4, full_18] + del slice_62 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_57 = paddle._C_ops.stack(combine_59, 0) + del combine_59 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_130 = paddle._C_ops.reshape(transpose_57, stack_57) + del stack_57, transpose_57 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_53 = paddle._C_ops.matmul(reshape_130, parameter_194, False, False) + del parameter_194, reshape_130 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_64 = paddle._C_ops.add(matmul_53, parameter_193) + del matmul_53, parameter_193 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_131 = paddle._C_ops.reshape(add_64, full_int_array_38) + del add_64 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_132 = paddle._C_ops.reshape(reshape_131, full_int_array_40) + del reshape_131 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_58 = paddle._C_ops.transpose(reshape_132, [0, 1, 3, 2, 4, 5]) + del reshape_132 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_58, full_int_array_41) + del transpose_58 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_60 = [slice_60, full_33, full_18] + del slice_60 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_58 = paddle._C_ops.stack(combine_60, 0) + del combine_60 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, stack_58) + del reshape_133, stack_58 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_65 = paddle._C_ops.add(add_61, reshape_134) + del add_61, reshape_134 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_65, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del layer_norm_60, parameter_190 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_66 = paddle._C_ops.add(matmul_54, parameter_189) + del matmul_54, parameter_189 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_66, False) + del add_66 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_55 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del gelu_8, parameter_188 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_67 = paddle._C_ops.add(matmul_55, parameter_187) + del matmul_55, parameter_187 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_68 = paddle._C_ops.add(add_65, add_67) + del add_65, add_67 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_39 = paddle._C_ops.shape64(add_68) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + shape64_39, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_39 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_68, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_61 = [slice_66, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_59 = paddle._C_ops.stack(combine_61, 0) + del combine_61 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_135 = paddle._C_ops.reshape(layer_norm_63, stack_59) + del layer_norm_63, stack_59 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_40 = paddle._C_ops.shape64(reshape_135) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + shape64_40, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_40 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_135, full_int_array_11, [1, 2]) + del reshape_135 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_41 = paddle._C_ops.shape64(roll_8) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + shape64_41, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_41 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_62 = [slice_68, full_31, full_3, full_31, full_3, full_18] + del slice_68 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_60 = paddle._C_ops.stack(combine_62, 0) + del combine_62 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_136 = paddle._C_ops.reshape(roll_8, stack_60) + del roll_8, stack_60 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_59 = paddle._C_ops.transpose(reshape_136, [0, 1, 3, 2, 4, 5]) + del reshape_136 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_137 = paddle._C_ops.reshape(transpose_59, full_int_array_38) + del transpose_59 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_138 = paddle._C_ops.reshape(reshape_137, full_int_array_39) + del reshape_137 + + # pd_op.full: (1x14x14x1xf32) <- () + full_39 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_39, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_139 = paddle._C_ops.reshape(set_value__4, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_60 = paddle._C_ops.transpose(reshape_139, [0, 1, 3, 2, 4, 5]) + del reshape_139 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_140 = paddle._C_ops.reshape(transpose_60, full_int_array_27) + del transpose_60 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_141 = paddle._C_ops.reshape(reshape_140, full_int_array_28) + del reshape_140 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(reshape_141, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(reshape_141, full_int_array_5) + del reshape_141 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_25, unsqueeze_26) + del unsqueeze_25, unsqueeze_26 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_35, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_9 = paddle._C_ops.where(equal_4, full_36, where_8) + del equal_4, where_8 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_42 = paddle._C_ops.shape64(reshape_138) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + shape64_42, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_42 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_56 = paddle._C_ops.matmul(reshape_138, parameter_184, False, False) + del parameter_184, reshape_138 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_69 = paddle._C_ops.add(matmul_56, parameter_183) + del matmul_56, parameter_183 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_63 = [slice_69, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_61 = paddle._C_ops.stack(combine_63, 0) + del combine_63 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_142 = paddle._C_ops.reshape(add_69, stack_61) + del add_69, stack_61 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_61 = paddle._C_ops.transpose(reshape_142, [2, 0, 3, 1, 4]) + del reshape_142 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_72 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_61 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_70, full_8, float("0"), True) + del slice_70 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_62 = paddle._C_ops.transpose(slice_71, [0, 1, 3, 2]) + del slice_71 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_57 = paddle._C_ops.matmul(scale_9, transpose_62, False, False) + del scale_9, transpose_62 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_143 = paddle._C_ops.reshape(data_19, full_int_array_7) + del data_19 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_9 = paddle._C_ops.index_select(data_20, reshape_143, 0) + del data_20, reshape_143 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_144 = paddle._C_ops.reshape(index_select_9, full_int_array_8) + del index_select_9 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_63 = paddle._C_ops.transpose(reshape_144, [2, 0, 1]) + del reshape_144 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_63, full_int_array_0) + del transpose_63 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_70 = paddle._C_ops.add(matmul_57, unsqueeze_27) + del matmul_57, unsqueeze_27 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_4 = paddle._C_ops.floor_divide(slice_69, full_37) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_64 = [floor_divide_4, full_21, full_32, full_4, full_4] + del floor_divide_4 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_62 = paddle._C_ops.stack(combine_64, 0) + del combine_64 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_145 = paddle._C_ops.reshape(add_70, stack_62) + del add_70, stack_62 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(where_9, full_int_array_1) + del where_9 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_28, full_int_array_0) + del unsqueeze_28 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_71 = paddle._C_ops.add(reshape_145, unsqueeze_29) + del reshape_145, unsqueeze_29 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_65 = [slice_69, full_32, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_63 = paddle._C_ops.stack(combine_65, 0) + del combine_65 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_71, stack_63) + del add_71, stack_63 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_9 = paddle._C_ops.softmax(reshape_146, -1) + del reshape_146 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_58 = paddle._C_ops.matmul(softmax_9, slice_72, False, False) + del slice_72, softmax_9 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_64 = paddle._C_ops.transpose(matmul_58, [0, 2, 1, 3]) + del matmul_58 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_66 = [slice_69, full_4, full_18] + del slice_69 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_64 = paddle._C_ops.stack(combine_66, 0) + del combine_66 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_147 = paddle._C_ops.reshape(transpose_64, stack_64) + del stack_64, transpose_64 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(reshape_147, parameter_182, False, False) + del parameter_182, reshape_147 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_72 = paddle._C_ops.add(matmul_59, parameter_181) + del matmul_59, parameter_181 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_148 = paddle._C_ops.reshape(add_72, full_int_array_38) + del add_72 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_149 = paddle._C_ops.reshape(reshape_148, full_int_array_40) + del reshape_148 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_65 = paddle._C_ops.transpose(reshape_149, [0, 1, 3, 2, 4, 5]) + del reshape_149 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_65, full_int_array_41) + del transpose_65 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_150, full_int_array_29, [1, 2]) + del reshape_150 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_67 = [slice_66, full_33, full_18] + del slice_66 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_65 = paddle._C_ops.stack(combine_67, 0) + del combine_67 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(roll_9, stack_65) + del roll_9, stack_65 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_73 = paddle._C_ops.add(add_68, reshape_151) + del add_68, reshape_151 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del layer_norm_66, parameter_178 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_74 = paddle._C_ops.add(matmul_60, parameter_177) + del matmul_60, parameter_177 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_74, False) + del add_74 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_61 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del gelu_9, parameter_176 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_61, parameter_175) + del matmul_61, parameter_175 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_76 = paddle._C_ops.add(add_73, add_75) + del add_73, add_75 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_43 = paddle._C_ops.shape64(add_76) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_73 = paddle._C_ops.slice( + shape64_43, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_43 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_68 = [slice_73, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_66 = paddle._C_ops.stack(combine_68, 0) + del combine_68 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_152 = paddle._C_ops.reshape(layer_norm_69, stack_66) + del layer_norm_69, stack_66 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_44 = paddle._C_ops.shape64(reshape_152) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_74 = paddle._C_ops.slice( + shape64_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_44 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_69 = [slice_74, full_31, full_3, full_31, full_3, full_18] + del slice_74 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_67 = paddle._C_ops.stack(combine_69, 0) + del combine_69 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_153 = paddle._C_ops.reshape(reshape_152, stack_67) + del reshape_152, stack_67 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_66 = paddle._C_ops.transpose(reshape_153, [0, 1, 3, 2, 4, 5]) + del reshape_153 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_154 = paddle._C_ops.reshape(transpose_66, full_int_array_38) + del transpose_66 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_155 = paddle._C_ops.reshape(reshape_154, full_int_array_39) + del reshape_154 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_45 = paddle._C_ops.shape64(reshape_155) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_75 = paddle._C_ops.slice( + shape64_45, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_45 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_62 = paddle._C_ops.matmul(reshape_155, parameter_172, False, False) + del parameter_172, reshape_155 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_77 = paddle._C_ops.add(matmul_62, parameter_171) + del matmul_62, parameter_171 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_70 = [slice_75, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_68 = paddle._C_ops.stack(combine_70, 0) + del combine_70 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_156 = paddle._C_ops.reshape(add_77, stack_68) + del add_77, stack_68 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_67 = paddle._C_ops.transpose(reshape_156, [2, 0, 3, 1, 4]) + del reshape_156 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_76 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_77 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_78 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_67 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_76, full_8, float("0"), True) + del slice_76 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_68 = paddle._C_ops.transpose(slice_77, [0, 1, 3, 2]) + del slice_77 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_63 = paddle._C_ops.matmul(scale_10, transpose_68, False, False) + del scale_10, transpose_68 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_157 = paddle._C_ops.reshape(data_21, full_int_array_7) + del data_21 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_10 = paddle._C_ops.index_select(data_22, reshape_157, 0) + del data_22, reshape_157 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_158 = paddle._C_ops.reshape(index_select_10, full_int_array_8) + del index_select_10 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_69 = paddle._C_ops.transpose(reshape_158, [2, 0, 1]) + del reshape_158 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_69, full_int_array_0) + del transpose_69 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_78 = paddle._C_ops.add(matmul_63, unsqueeze_30) + del matmul_63, unsqueeze_30 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_10 = paddle._C_ops.softmax(add_78, -1) + del add_78 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_64 = paddle._C_ops.matmul(softmax_10, slice_78, False, False) + del slice_78, softmax_10 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_70 = paddle._C_ops.transpose(matmul_64, [0, 2, 1, 3]) + del matmul_64 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_71 = [slice_75, full_4, full_18] + del slice_75 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_69 = paddle._C_ops.stack(combine_71, 0) + del combine_71 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_159 = paddle._C_ops.reshape(transpose_70, stack_69) + del stack_69, transpose_70 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(reshape_159, parameter_170, False, False) + del parameter_170, reshape_159 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_79 = paddle._C_ops.add(matmul_65, parameter_169) + del matmul_65, parameter_169 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_160 = paddle._C_ops.reshape(add_79, full_int_array_38) + del add_79 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_161 = paddle._C_ops.reshape(reshape_160, full_int_array_40) + del reshape_160 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_71 = paddle._C_ops.transpose(reshape_161, [0, 1, 3, 2, 4, 5]) + del reshape_161 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_162 = paddle._C_ops.reshape(transpose_71, full_int_array_41) + del transpose_71 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_72 = [slice_73, full_33, full_18] + del slice_73 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_70 = paddle._C_ops.stack(combine_72, 0) + del combine_72 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_163 = paddle._C_ops.reshape(reshape_162, stack_70) + del reshape_162, stack_70 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_80 = paddle._C_ops.add(add_76, reshape_163) + del add_76, reshape_163 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_80, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del layer_norm_72, parameter_166 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_81 = paddle._C_ops.add(matmul_66, parameter_165) + del matmul_66, parameter_165 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_81, False) + del add_81 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_67 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del gelu_10, parameter_164 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_82 = paddle._C_ops.add(matmul_67, parameter_163) + del matmul_67, parameter_163 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_83 = paddle._C_ops.add(add_80, add_82) + del add_80, add_82 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_46 = paddle._C_ops.shape64(add_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_79 = paddle._C_ops.slice( + shape64_46, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_46 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_83, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_73 = [slice_79, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_71 = paddle._C_ops.stack(combine_73, 0) + del combine_73 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(layer_norm_75, stack_71) + del layer_norm_75, stack_71 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_47 = paddle._C_ops.shape64(reshape_164) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_80 = paddle._C_ops.slice( + shape64_47, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_47 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_164, full_int_array_11, [1, 2]) + del reshape_164 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_48 = paddle._C_ops.shape64(roll_10) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_81 = paddle._C_ops.slice( + shape64_48, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_48 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_74 = [slice_81, full_31, full_3, full_31, full_3, full_18] + del slice_81 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_72 = paddle._C_ops.stack(combine_74, 0) + del combine_74 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_165 = paddle._C_ops.reshape(roll_10, stack_72) + del roll_10, stack_72 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_72 = paddle._C_ops.transpose(reshape_165, [0, 1, 3, 2, 4, 5]) + del reshape_165 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(transpose_72, full_int_array_38) + del transpose_72 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_167 = paddle._C_ops.reshape(reshape_166, full_int_array_39) + del reshape_166 + + # pd_op.full: (1x14x14x1xf32) <- () + full_40 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_40, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_168 = paddle._C_ops.reshape(set_value__5, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_73 = paddle._C_ops.transpose(reshape_168, [0, 1, 3, 2, 4, 5]) + del reshape_168 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_169 = paddle._C_ops.reshape(transpose_73, full_int_array_27) + del transpose_73 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_170 = paddle._C_ops.reshape(reshape_169, full_int_array_28) + del reshape_169 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(reshape_170, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(reshape_170, full_int_array_5) + del reshape_170 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_31, unsqueeze_32) + del unsqueeze_31, unsqueeze_32 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_35, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_11 = paddle._C_ops.where(equal_5, full_36, where_10) + del equal_5, where_10 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_49 = paddle._C_ops.shape64(reshape_167) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_82 = paddle._C_ops.slice( + shape64_49, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_49 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_68 = paddle._C_ops.matmul(reshape_167, parameter_160, False, False) + del parameter_160, reshape_167 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_84 = paddle._C_ops.add(matmul_68, parameter_159) + del matmul_68, parameter_159 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_75 = [slice_82, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_73 = paddle._C_ops.stack(combine_75, 0) + del combine_75 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_171 = paddle._C_ops.reshape(add_84, stack_73) + del add_84, stack_73 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_171, [2, 0, 3, 1, 4]) + del reshape_171 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_83 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_84 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_85 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_74 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_83, full_8, float("0"), True) + del slice_83 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_84, [0, 1, 3, 2]) + del slice_84 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_69 = paddle._C_ops.matmul(scale_11, transpose_75, False, False) + del scale_11, transpose_75 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_172 = paddle._C_ops.reshape(data_23, full_int_array_7) + del data_23 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_11 = paddle._C_ops.index_select(data_24, reshape_172, 0) + del data_24, reshape_172 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_173 = paddle._C_ops.reshape(index_select_11, full_int_array_8) + del index_select_11 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_76 = paddle._C_ops.transpose(reshape_173, [2, 0, 1]) + del reshape_173 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_0) + del transpose_76 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_85 = paddle._C_ops.add(matmul_69, unsqueeze_33) + del matmul_69, unsqueeze_33 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_5 = paddle._C_ops.floor_divide(slice_82, full_37) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_76 = [floor_divide_5, full_21, full_32, full_4, full_4] + del floor_divide_5 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_74 = paddle._C_ops.stack(combine_76, 0) + del combine_76 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_174 = paddle._C_ops.reshape(add_85, stack_74) + del add_85, stack_74 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(where_11, full_int_array_1) + del where_11 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_34, full_int_array_0) + del unsqueeze_34 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_86 = paddle._C_ops.add(reshape_174, unsqueeze_35) + del reshape_174, unsqueeze_35 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_77 = [slice_82, full_32, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_75 = paddle._C_ops.stack(combine_77, 0) + del combine_77 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(add_86, stack_75) + del add_86, stack_75 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_11 = paddle._C_ops.softmax(reshape_175, -1) + del reshape_175 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_70 = paddle._C_ops.matmul(softmax_11, slice_85, False, False) + del slice_85, softmax_11 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_70, [0, 2, 1, 3]) + del matmul_70 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_78 = [slice_82, full_4, full_18] + del slice_82 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_76 = paddle._C_ops.stack(combine_78, 0) + del combine_78 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_176 = paddle._C_ops.reshape(transpose_77, stack_76) + del stack_76, transpose_77 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_71 = paddle._C_ops.matmul(reshape_176, parameter_158, False, False) + del parameter_158, reshape_176 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_87 = paddle._C_ops.add(matmul_71, parameter_157) + del matmul_71, parameter_157 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_177 = paddle._C_ops.reshape(add_87, full_int_array_38) + del add_87 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_178 = paddle._C_ops.reshape(reshape_177, full_int_array_40) + del reshape_177 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_78 = paddle._C_ops.transpose(reshape_178, [0, 1, 3, 2, 4, 5]) + del reshape_178 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_179 = paddle._C_ops.reshape(transpose_78, full_int_array_41) + del transpose_78 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_179, full_int_array_29, [1, 2]) + del reshape_179 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_79 = [slice_79, full_33, full_18] + del slice_79 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_77 = paddle._C_ops.stack(combine_79, 0) + del combine_79 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_180 = paddle._C_ops.reshape(roll_11, stack_77) + del roll_11, stack_77 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_88 = paddle._C_ops.add(add_83, reshape_180) + del add_83, reshape_180 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_88, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_72 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del layer_norm_78, parameter_154 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_89 = paddle._C_ops.add(matmul_72, parameter_153) + del matmul_72, parameter_153 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_89, False) + del add_89 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_73 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del gelu_11, parameter_152 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_90 = paddle._C_ops.add(matmul_73, parameter_151) + del matmul_73, parameter_151 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_91 = paddle._C_ops.add(add_88, add_90) + del add_88, add_90 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_50 = paddle._C_ops.shape64(add_91) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_86 = paddle._C_ops.slice( + shape64_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_50 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_91, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_80 = [slice_86, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_78 = paddle._C_ops.stack(combine_80, 0) + del combine_80 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(layer_norm_81, stack_78) + del layer_norm_81, stack_78 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_51 = paddle._C_ops.shape64(reshape_181) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_87 = paddle._C_ops.slice( + shape64_51, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_81 = [slice_87, full_31, full_3, full_31, full_3, full_18] + del slice_87 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_79 = paddle._C_ops.stack(combine_81, 0) + del combine_81 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, stack_79) + del reshape_181, stack_79 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_79 = paddle._C_ops.transpose(reshape_182, [0, 1, 3, 2, 4, 5]) + del reshape_182 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(transpose_79, full_int_array_38) + del transpose_79 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_184 = paddle._C_ops.reshape(reshape_183, full_int_array_39) + del reshape_183 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_52 = paddle._C_ops.shape64(reshape_184) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_88 = paddle._C_ops.slice( + shape64_52, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_52 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_74 = paddle._C_ops.matmul(reshape_184, parameter_148, False, False) + del parameter_148, reshape_184 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_92 = paddle._C_ops.add(matmul_74, parameter_147) + del matmul_74, parameter_147 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_82 = [slice_88, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_80 = paddle._C_ops.stack(combine_82, 0) + del combine_82 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_185 = paddle._C_ops.reshape(add_92, stack_80) + del add_92, stack_80 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_185, [2, 0, 3, 1, 4]) + del reshape_185 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_89 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_90 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_91 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_80 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_89, full_8, float("0"), True) + del slice_89 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_90, [0, 1, 3, 2]) + del slice_90 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_75 = paddle._C_ops.matmul(scale_12, transpose_81, False, False) + del scale_12, transpose_81 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_25, full_int_array_7) + del data_25 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_12 = paddle._C_ops.index_select(data_26, reshape_186, 0) + del data_26, reshape_186 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_187 = paddle._C_ops.reshape(index_select_12, full_int_array_8) + del index_select_12 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_82 = paddle._C_ops.transpose(reshape_187, [2, 0, 1]) + del reshape_187 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_0) + del transpose_82 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_93 = paddle._C_ops.add(matmul_75, unsqueeze_36) + del matmul_75, unsqueeze_36 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_12 = paddle._C_ops.softmax(add_93, -1) + del add_93 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_76 = paddle._C_ops.matmul(softmax_12, slice_91, False, False) + del slice_91, softmax_12 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_76, [0, 2, 1, 3]) + del matmul_76 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_83 = [slice_88, full_4, full_18] + del slice_88 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_81 = paddle._C_ops.stack(combine_83, 0) + del combine_83 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_83, stack_81) + del stack_81, transpose_83 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_77 = paddle._C_ops.matmul(reshape_188, parameter_146, False, False) + del parameter_146, reshape_188 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_77, parameter_145) + del matmul_77, parameter_145 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_94, full_int_array_38) + del add_94 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_190 = paddle._C_ops.reshape(reshape_189, full_int_array_40) + del reshape_189 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_84 = paddle._C_ops.transpose(reshape_190, [0, 1, 3, 2, 4, 5]) + del reshape_190 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_191 = paddle._C_ops.reshape(transpose_84, full_int_array_41) + del transpose_84 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_84 = [slice_86, full_33, full_18] + del slice_86 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_82 = paddle._C_ops.stack(combine_84, 0) + del combine_84 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_192 = paddle._C_ops.reshape(reshape_191, stack_82) + del reshape_191, stack_82 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_95 = paddle._C_ops.add(add_91, reshape_192) + del add_91, reshape_192 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del layer_norm_84, parameter_142 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_96 = paddle._C_ops.add(matmul_78, parameter_141) + del matmul_78, parameter_141 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_12 = paddle._C_ops.gelu(add_96, False) + del add_96 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_79 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del gelu_12, parameter_140 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_97 = paddle._C_ops.add(matmul_79, parameter_139) + del matmul_79, parameter_139 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_98 = paddle._C_ops.add(add_95, add_97) + del add_95, add_97 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_53 = paddle._C_ops.shape64(add_98) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_92 = paddle._C_ops.slice( + shape64_53, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_53 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_85 = [slice_92, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_83 = paddle._C_ops.stack(combine_85, 0) + del combine_85 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_193 = paddle._C_ops.reshape(layer_norm_87, stack_83) + del layer_norm_87, stack_83 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_54 = paddle._C_ops.shape64(reshape_193) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_93 = paddle._C_ops.slice( + shape64_54, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_54 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_193, full_int_array_11, [1, 2]) + del reshape_193 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_55 = paddle._C_ops.shape64(roll_12) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_94 = paddle._C_ops.slice( + shape64_55, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_55 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_86 = [slice_94, full_31, full_3, full_31, full_3, full_18] + del slice_94 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_84 = paddle._C_ops.stack(combine_86, 0) + del combine_86 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_194 = paddle._C_ops.reshape(roll_12, stack_84) + del roll_12, stack_84 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_85 = paddle._C_ops.transpose(reshape_194, [0, 1, 3, 2, 4, 5]) + del reshape_194 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_85, full_int_array_38) + del transpose_85 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_39) + del reshape_195 + + # pd_op.full: (1x14x14x1xf32) <- () + full_41 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_41, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_197 = paddle._C_ops.reshape(set_value__6, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_86 = paddle._C_ops.transpose(reshape_197, [0, 1, 3, 2, 4, 5]) + del reshape_197 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_198 = paddle._C_ops.reshape(transpose_86, full_int_array_27) + del transpose_86 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_199 = paddle._C_ops.reshape(reshape_198, full_int_array_28) + del reshape_198 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_199, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_199, full_int_array_5) + del reshape_199 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_35, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_13 = paddle._C_ops.where(equal_6, full_36, where_12) + del equal_6, where_12 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_56 = paddle._C_ops.shape64(reshape_196) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_95 = paddle._C_ops.slice( + shape64_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_56 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_80 = paddle._C_ops.matmul(reshape_196, parameter_136, False, False) + del parameter_136, reshape_196 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_99 = paddle._C_ops.add(matmul_80, parameter_135) + del matmul_80, parameter_135 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_87 = [slice_95, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_85 = paddle._C_ops.stack(combine_87, 0) + del combine_87 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_200 = paddle._C_ops.reshape(add_99, stack_85) + del add_99, stack_85 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_87 = paddle._C_ops.transpose(reshape_200, [2, 0, 3, 1, 4]) + del reshape_200 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_96 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_97 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_98 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_87 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_96, full_8, float("0"), True) + del slice_96 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_88 = paddle._C_ops.transpose(slice_97, [0, 1, 3, 2]) + del slice_97 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_81 = paddle._C_ops.matmul(scale_13, transpose_88, False, False) + del scale_13, transpose_88 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_201 = paddle._C_ops.reshape(data_27, full_int_array_7) + del data_27 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_13 = paddle._C_ops.index_select(data_28, reshape_201, 0) + del data_28, reshape_201 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_202 = paddle._C_ops.reshape(index_select_13, full_int_array_8) + del index_select_13 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_89 = paddle._C_ops.transpose(reshape_202, [2, 0, 1]) + del reshape_202 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(transpose_89, full_int_array_0) + del transpose_89 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_100 = paddle._C_ops.add(matmul_81, unsqueeze_39) + del matmul_81, unsqueeze_39 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_6 = paddle._C_ops.floor_divide(slice_95, full_37) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_88 = [floor_divide_6, full_21, full_32, full_4, full_4] + del floor_divide_6 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_86 = paddle._C_ops.stack(combine_88, 0) + del combine_88 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_203 = paddle._C_ops.reshape(add_100, stack_86) + del add_100, stack_86 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(where_13, full_int_array_1) + del where_13 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(unsqueeze_40, full_int_array_0) + del unsqueeze_40 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_101 = paddle._C_ops.add(reshape_203, unsqueeze_41) + del reshape_203, unsqueeze_41 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_89 = [slice_95, full_32, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_87 = paddle._C_ops.stack(combine_89, 0) + del combine_89 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_204 = paddle._C_ops.reshape(add_101, stack_87) + del add_101, stack_87 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_13 = paddle._C_ops.softmax(reshape_204, -1) + del reshape_204 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_82 = paddle._C_ops.matmul(softmax_13, slice_98, False, False) + del slice_98, softmax_13 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_90 = paddle._C_ops.transpose(matmul_82, [0, 2, 1, 3]) + del matmul_82 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_90 = [slice_95, full_4, full_18] + del slice_95 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_88 = paddle._C_ops.stack(combine_90, 0) + del combine_90 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_205 = paddle._C_ops.reshape(transpose_90, stack_88) + del stack_88, transpose_90 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_83 = paddle._C_ops.matmul(reshape_205, parameter_134, False, False) + del parameter_134, reshape_205 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_102 = paddle._C_ops.add(matmul_83, parameter_133) + del matmul_83, parameter_133 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_206 = paddle._C_ops.reshape(add_102, full_int_array_38) + del add_102 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_207 = paddle._C_ops.reshape(reshape_206, full_int_array_40) + del reshape_206 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_91 = paddle._C_ops.transpose(reshape_207, [0, 1, 3, 2, 4, 5]) + del reshape_207 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(transpose_91, full_int_array_41) + del transpose_91 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_208, full_int_array_29, [1, 2]) + del reshape_208 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_91 = [slice_92, full_33, full_18] + del slice_92 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_89 = paddle._C_ops.stack(combine_91, 0) + del combine_91 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_209 = paddle._C_ops.reshape(roll_13, stack_89) + del roll_13, stack_89 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_103 = paddle._C_ops.add(add_98, reshape_209) + del add_98, reshape_209 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_103, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_84 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del layer_norm_90, parameter_130 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_104 = paddle._C_ops.add(matmul_84, parameter_129) + del matmul_84, parameter_129 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_13 = paddle._C_ops.gelu(add_104, False) + del add_104 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_85 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del gelu_13, parameter_128 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_105 = paddle._C_ops.add(matmul_85, parameter_127) + del matmul_85, parameter_127 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_106 = paddle._C_ops.add(add_103, add_105) + del add_103, add_105 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_57 = paddle._C_ops.shape64(add_106) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_99 = paddle._C_ops.slice( + shape64_57, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_57 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_106, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_92 = [slice_99, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_90 = paddle._C_ops.stack(combine_92, 0) + del combine_92 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_210 = paddle._C_ops.reshape(layer_norm_93, stack_90) + del layer_norm_93, stack_90 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_58 = paddle._C_ops.shape64(reshape_210) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_100 = paddle._C_ops.slice( + shape64_58, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_58 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_93 = [slice_100, full_31, full_3, full_31, full_3, full_18] + del slice_100 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_91 = paddle._C_ops.stack(combine_93, 0) + del combine_93 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_210, stack_91) + del reshape_210, stack_91 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_92 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_212 = paddle._C_ops.reshape(transpose_92, full_int_array_38) + del transpose_92 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(reshape_212, full_int_array_39) + del reshape_212 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_59 = paddle._C_ops.shape64(reshape_213) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_101 = paddle._C_ops.slice( + shape64_59, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_59 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_86 = paddle._C_ops.matmul(reshape_213, parameter_124, False, False) + del parameter_124, reshape_213 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_107 = paddle._C_ops.add(matmul_86, parameter_123) + del matmul_86, parameter_123 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_94 = [slice_101, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_92 = paddle._C_ops.stack(combine_94, 0) + del combine_94 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_214 = paddle._C_ops.reshape(add_107, stack_92) + del add_107, stack_92 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_93 = paddle._C_ops.transpose(reshape_214, [2, 0, 3, 1, 4]) + del reshape_214 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_102 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_103 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_104 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_93 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_102, full_8, float("0"), True) + del slice_102 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_94 = paddle._C_ops.transpose(slice_103, [0, 1, 3, 2]) + del slice_103 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_87 = paddle._C_ops.matmul(scale_14, transpose_94, False, False) + del scale_14, transpose_94 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_215 = paddle._C_ops.reshape(data_29, full_int_array_7) + del data_29 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_14 = paddle._C_ops.index_select(data_30, reshape_215, 0) + del data_30, reshape_215 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_216 = paddle._C_ops.reshape(index_select_14, full_int_array_8) + del index_select_14 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_95 = paddle._C_ops.transpose(reshape_216, [2, 0, 1]) + del reshape_216 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(transpose_95, full_int_array_0) + del transpose_95 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_108 = paddle._C_ops.add(matmul_87, unsqueeze_42) + del matmul_87, unsqueeze_42 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_14 = paddle._C_ops.softmax(add_108, -1) + del add_108 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_88 = paddle._C_ops.matmul(softmax_14, slice_104, False, False) + del slice_104, softmax_14 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_96 = paddle._C_ops.transpose(matmul_88, [0, 2, 1, 3]) + del matmul_88 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_95 = [slice_101, full_4, full_18] + del slice_101 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_93 = paddle._C_ops.stack(combine_95, 0) + del combine_95 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_217 = paddle._C_ops.reshape(transpose_96, stack_93) + del stack_93, transpose_96 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(reshape_217, parameter_122, False, False) + del parameter_122, reshape_217 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_109 = paddle._C_ops.add(matmul_89, parameter_121) + del matmul_89, parameter_121 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_218 = paddle._C_ops.reshape(add_109, full_int_array_38) + del add_109 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_219 = paddle._C_ops.reshape(reshape_218, full_int_array_40) + del reshape_218 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_97 = paddle._C_ops.transpose(reshape_219, [0, 1, 3, 2, 4, 5]) + del reshape_219 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_220 = paddle._C_ops.reshape(transpose_97, full_int_array_41) + del transpose_97 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_96 = [slice_99, full_33, full_18] + del slice_99 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_94 = paddle._C_ops.stack(combine_96, 0) + del combine_96 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_221 = paddle._C_ops.reshape(reshape_220, stack_94) + del reshape_220, stack_94 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_110 = paddle._C_ops.add(add_106, reshape_221) + del add_106, reshape_221 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_110, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del layer_norm_96, parameter_118 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_111 = paddle._C_ops.add(matmul_90, parameter_117) + del matmul_90, parameter_117 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_14 = paddle._C_ops.gelu(add_111, False) + del add_111 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_91 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del gelu_14, parameter_116 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_112 = paddle._C_ops.add(matmul_91, parameter_115) + del matmul_91, parameter_115 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_113 = paddle._C_ops.add(add_110, add_112) + del add_110, add_112 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_60 = paddle._C_ops.shape64(add_113) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_105 = paddle._C_ops.slice( + shape64_60, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_60 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_113, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_97 = [slice_105, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_95 = paddle._C_ops.stack(combine_97, 0) + del combine_97 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_222 = paddle._C_ops.reshape(layer_norm_99, stack_95) + del layer_norm_99, stack_95 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_61 = paddle._C_ops.shape64(reshape_222) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_106 = paddle._C_ops.slice( + shape64_61, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_61 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_222, full_int_array_11, [1, 2]) + del reshape_222 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_62 = paddle._C_ops.shape64(roll_14) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_107 = paddle._C_ops.slice( + shape64_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_62 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_98 = [slice_107, full_31, full_3, full_31, full_3, full_18] + del slice_107 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_96 = paddle._C_ops.stack(combine_98, 0) + del combine_98 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_223 = paddle._C_ops.reshape(roll_14, stack_96) + del roll_14, stack_96 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_98 = paddle._C_ops.transpose(reshape_223, [0, 1, 3, 2, 4, 5]) + del reshape_223 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_224 = paddle._C_ops.reshape(transpose_98, full_int_array_38) + del transpose_98 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_225 = paddle._C_ops.reshape(reshape_224, full_int_array_39) + del reshape_224 + + # pd_op.full: (1x14x14x1xf32) <- () + full_42 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_42, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_226 = paddle._C_ops.reshape(set_value__7, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_99 = paddle._C_ops.transpose(reshape_226, [0, 1, 3, 2, 4, 5]) + del reshape_226 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_227 = paddle._C_ops.reshape(transpose_99, full_int_array_27) + del transpose_99 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_228 = paddle._C_ops.reshape(reshape_227, full_int_array_28) + del reshape_227 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_228, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_228, full_int_array_5) + del reshape_228 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_35, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_15 = paddle._C_ops.where(equal_7, full_36, where_14) + del equal_7, where_14 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_63 = paddle._C_ops.shape64(reshape_225) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_108 = paddle._C_ops.slice( + shape64_63, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_63 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_92 = paddle._C_ops.matmul(reshape_225, parameter_112, False, False) + del parameter_112, reshape_225 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_114 = paddle._C_ops.add(matmul_92, parameter_111) + del matmul_92, parameter_111 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_99 = [slice_108, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_97 = paddle._C_ops.stack(combine_99, 0) + del combine_99 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_229 = paddle._C_ops.reshape(add_114, stack_97) + del add_114, stack_97 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_100 = paddle._C_ops.transpose(reshape_229, [2, 0, 3, 1, 4]) + del reshape_229 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_109 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_110 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_111 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_100 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_109, full_8, float("0"), True) + del slice_109 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_101 = paddle._C_ops.transpose(slice_110, [0, 1, 3, 2]) + del slice_110 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_93 = paddle._C_ops.matmul(scale_15, transpose_101, False, False) + del scale_15, transpose_101 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_230 = paddle._C_ops.reshape(data_31, full_int_array_7) + del data_31 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_15 = paddle._C_ops.index_select(data_32, reshape_230, 0) + del data_32, reshape_230 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_231 = paddle._C_ops.reshape(index_select_15, full_int_array_8) + del index_select_15 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_102 = paddle._C_ops.transpose(reshape_231, [2, 0, 1]) + del reshape_231 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(transpose_102, full_int_array_0) + del transpose_102 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_115 = paddle._C_ops.add(matmul_93, unsqueeze_45) + del matmul_93, unsqueeze_45 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_7 = paddle._C_ops.floor_divide(slice_108, full_37) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_100 = [floor_divide_7, full_21, full_32, full_4, full_4] + del floor_divide_7 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_98 = paddle._C_ops.stack(combine_100, 0) + del combine_100 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_115, stack_98) + del add_115, stack_98 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(where_15, full_int_array_1) + del where_15 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(unsqueeze_46, full_int_array_0) + del unsqueeze_46 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_116 = paddle._C_ops.add(reshape_232, unsqueeze_47) + del reshape_232, unsqueeze_47 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_101 = [slice_108, full_32, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_99 = paddle._C_ops.stack(combine_101, 0) + del combine_101 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_233 = paddle._C_ops.reshape(add_116, stack_99) + del add_116, stack_99 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_15 = paddle._C_ops.softmax(reshape_233, -1) + del reshape_233 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_94 = paddle._C_ops.matmul(softmax_15, slice_111, False, False) + del slice_111, softmax_15 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_103 = paddle._C_ops.transpose(matmul_94, [0, 2, 1, 3]) + del matmul_94 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_102 = [slice_108, full_4, full_18] + del slice_108 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_100 = paddle._C_ops.stack(combine_102, 0) + del combine_102 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_234 = paddle._C_ops.reshape(transpose_103, stack_100) + del stack_100, transpose_103 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_95 = paddle._C_ops.matmul(reshape_234, parameter_110, False, False) + del parameter_110, reshape_234 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_117 = paddle._C_ops.add(matmul_95, parameter_109) + del matmul_95, parameter_109 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_235 = paddle._C_ops.reshape(add_117, full_int_array_38) + del add_117 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_236 = paddle._C_ops.reshape(reshape_235, full_int_array_40) + del reshape_235 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_104 = paddle._C_ops.transpose(reshape_236, [0, 1, 3, 2, 4, 5]) + del reshape_236 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_237 = paddle._C_ops.reshape(transpose_104, full_int_array_41) + del transpose_104 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_237, full_int_array_29, [1, 2]) + del reshape_237 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_103 = [slice_105, full_33, full_18] + del slice_105 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_101 = paddle._C_ops.stack(combine_103, 0) + del combine_103 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_238 = paddle._C_ops.reshape(roll_15, stack_101) + del roll_15, stack_101 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_118 = paddle._C_ops.add(add_113, reshape_238) + del add_113, reshape_238 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_96 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del layer_norm_102, parameter_106 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_119 = paddle._C_ops.add(matmul_96, parameter_105) + del matmul_96, parameter_105 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_15 = paddle._C_ops.gelu(add_119, False) + del add_119 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_97 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del gelu_15, parameter_104 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_120 = paddle._C_ops.add(matmul_97, parameter_103) + del matmul_97, parameter_103 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_121 = paddle._C_ops.add(add_118, add_120) + del add_118, add_120 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_64 = paddle._C_ops.shape64(add_121) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_112 = paddle._C_ops.slice( + shape64_64, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_64 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_121, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_104 = [slice_112, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_102 = paddle._C_ops.stack(combine_104, 0) + del combine_104 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_239 = paddle._C_ops.reshape(layer_norm_105, stack_102) + del layer_norm_105, stack_102 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_65 = paddle._C_ops.shape64(reshape_239) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_113 = paddle._C_ops.slice( + shape64_65, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_65 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_105 = [slice_113, full_31, full_3, full_31, full_3, full_18] + del slice_113 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_103 = paddle._C_ops.stack(combine_105, 0) + del combine_105 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_239, stack_103) + del reshape_239, stack_103 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_105 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_241 = paddle._C_ops.reshape(transpose_105, full_int_array_38) + del transpose_105 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_242 = paddle._C_ops.reshape(reshape_241, full_int_array_39) + del reshape_241 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_66 = paddle._C_ops.shape64(reshape_242) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_114 = paddle._C_ops.slice( + shape64_66, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_66 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_98 = paddle._C_ops.matmul(reshape_242, parameter_100, False, False) + del parameter_100, reshape_242 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_122 = paddle._C_ops.add(matmul_98, parameter_99) + del matmul_98, parameter_99 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_106 = [slice_114, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_104 = paddle._C_ops.stack(combine_106, 0) + del combine_106 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_243 = paddle._C_ops.reshape(add_122, stack_104) + del add_122, stack_104 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_106 = paddle._C_ops.transpose(reshape_243, [2, 0, 3, 1, 4]) + del reshape_243 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_115 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_116 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_117 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_106 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_115, full_8, float("0"), True) + del slice_115 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_107 = paddle._C_ops.transpose(slice_116, [0, 1, 3, 2]) + del slice_116 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_99 = paddle._C_ops.matmul(scale_16, transpose_107, False, False) + del scale_16, transpose_107 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_244 = paddle._C_ops.reshape(data_33, full_int_array_7) + del data_33 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_16 = paddle._C_ops.index_select(data_34, reshape_244, 0) + del data_34, reshape_244 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_245 = paddle._C_ops.reshape(index_select_16, full_int_array_8) + del index_select_16 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_108 = paddle._C_ops.transpose(reshape_245, [2, 0, 1]) + del reshape_245 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(transpose_108, full_int_array_0) + del transpose_108 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_123 = paddle._C_ops.add(matmul_99, unsqueeze_48) + del matmul_99, unsqueeze_48 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_16 = paddle._C_ops.softmax(add_123, -1) + del add_123 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_100 = paddle._C_ops.matmul(softmax_16, slice_117, False, False) + del slice_117, softmax_16 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_109 = paddle._C_ops.transpose(matmul_100, [0, 2, 1, 3]) + del matmul_100 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_107 = [slice_114, full_4, full_18] + del slice_114 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_105 = paddle._C_ops.stack(combine_107, 0) + del combine_107 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(transpose_109, stack_105) + del stack_105, transpose_109 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_101 = paddle._C_ops.matmul(reshape_246, parameter_98, False, False) + del parameter_98, reshape_246 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_124 = paddle._C_ops.add(matmul_101, parameter_97) + del matmul_101, parameter_97 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_124, full_int_array_38) + del add_124 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_247, full_int_array_40) + del reshape_247 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_110 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_249 = paddle._C_ops.reshape(transpose_110, full_int_array_41) + del transpose_110 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_108 = [slice_112, full_33, full_18] + del slice_112 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_106 = paddle._C_ops.stack(combine_108, 0) + del combine_108 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_250 = paddle._C_ops.reshape(reshape_249, stack_106) + del reshape_249, stack_106 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_125 = paddle._C_ops.add(add_121, reshape_250) + del add_121, reshape_250 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_125, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_102 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del layer_norm_108, parameter_94 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_126 = paddle._C_ops.add(matmul_102, parameter_93) + del matmul_102, parameter_93 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_16 = paddle._C_ops.gelu(add_126, False) + del add_126 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_103 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del gelu_16, parameter_92 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_127 = paddle._C_ops.add(matmul_103, parameter_91) + del matmul_103, parameter_91 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_128 = paddle._C_ops.add(add_125, add_127) + del add_125, add_127 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_67 = paddle._C_ops.shape64(add_128) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_118 = paddle._C_ops.slice( + shape64_67, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_67 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_128, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_109 = [slice_118, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_107 = paddle._C_ops.stack(combine_109, 0) + del combine_109 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_251 = paddle._C_ops.reshape(layer_norm_111, stack_107) + del layer_norm_111, stack_107 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_68 = paddle._C_ops.shape64(reshape_251) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_119 = paddle._C_ops.slice( + shape64_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_68 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_251, full_int_array_11, [1, 2]) + del reshape_251 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_69 = paddle._C_ops.shape64(roll_16) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_120 = paddle._C_ops.slice( + shape64_69, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_69 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_110 = [slice_120, full_31, full_3, full_31, full_3, full_18] + del slice_120 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_108 = paddle._C_ops.stack(combine_110, 0) + del combine_110 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(roll_16, stack_108) + del roll_16, stack_108 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_111 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_253 = paddle._C_ops.reshape(transpose_111, full_int_array_38) + del transpose_111 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_254 = paddle._C_ops.reshape(reshape_253, full_int_array_39) + del reshape_253 + + # pd_op.full: (1x14x14x1xf32) <- () + full_43 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_43, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_43 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_255 = paddle._C_ops.reshape(set_value__8, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_112 = paddle._C_ops.transpose(reshape_255, [0, 1, 3, 2, 4, 5]) + del reshape_255 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_256 = paddle._C_ops.reshape(transpose_112, full_int_array_27) + del transpose_112 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_257 = paddle._C_ops.reshape(reshape_256, full_int_array_28) + del reshape_256 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_257, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_257, full_int_array_5) + del reshape_257 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_35, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_17 = paddle._C_ops.where(equal_8, full_36, where_16) + del equal_8, where_16 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_70 = paddle._C_ops.shape64(reshape_254) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_121 = paddle._C_ops.slice( + shape64_70, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_70 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_104 = paddle._C_ops.matmul(reshape_254, parameter_88, False, False) + del parameter_88, reshape_254 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_129 = paddle._C_ops.add(matmul_104, parameter_87) + del matmul_104, parameter_87 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_111 = [slice_121, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_109 = paddle._C_ops.stack(combine_111, 0) + del combine_111 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_258 = paddle._C_ops.reshape(add_129, stack_109) + del add_129, stack_109 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_113 = paddle._C_ops.transpose(reshape_258, [2, 0, 3, 1, 4]) + del reshape_258 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_122 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_123 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_124 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_113 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_122, full_8, float("0"), True) + del slice_122 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_114 = paddle._C_ops.transpose(slice_123, [0, 1, 3, 2]) + del slice_123 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_105 = paddle._C_ops.matmul(scale_17, transpose_114, False, False) + del scale_17, transpose_114 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_259 = paddle._C_ops.reshape(data_35, full_int_array_7) + del data_35 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_17 = paddle._C_ops.index_select(data_36, reshape_259, 0) + del data_36, reshape_259 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_260 = paddle._C_ops.reshape(index_select_17, full_int_array_8) + del index_select_17 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_115 = paddle._C_ops.transpose(reshape_260, [2, 0, 1]) + del reshape_260 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(transpose_115, full_int_array_0) + del transpose_115 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_130 = paddle._C_ops.add(matmul_105, unsqueeze_51) + del matmul_105, unsqueeze_51 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_8 = paddle._C_ops.floor_divide(slice_121, full_37) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_112 = [floor_divide_8, full_21, full_32, full_4, full_4] + del floor_divide_8 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_110 = paddle._C_ops.stack(combine_112, 0) + del combine_112 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_261 = paddle._C_ops.reshape(add_130, stack_110) + del add_130, stack_110 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(where_17, full_int_array_1) + del where_17 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(unsqueeze_52, full_int_array_0) + del unsqueeze_52 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_131 = paddle._C_ops.add(reshape_261, unsqueeze_53) + del reshape_261, unsqueeze_53 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_113 = [slice_121, full_32, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_111 = paddle._C_ops.stack(combine_113, 0) + del combine_113 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_262 = paddle._C_ops.reshape(add_131, stack_111) + del add_131, stack_111 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_17 = paddle._C_ops.softmax(reshape_262, -1) + del reshape_262 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_106 = paddle._C_ops.matmul(softmax_17, slice_124, False, False) + del slice_124, softmax_17 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_116 = paddle._C_ops.transpose(matmul_106, [0, 2, 1, 3]) + del matmul_106 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_114 = [slice_121, full_4, full_18] + del slice_121 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_112 = paddle._C_ops.stack(combine_114, 0) + del combine_114 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(transpose_116, stack_112) + del stack_112, transpose_116 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_107 = paddle._C_ops.matmul(reshape_263, parameter_86, False, False) + del parameter_86, reshape_263 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_132 = paddle._C_ops.add(matmul_107, parameter_85) + del matmul_107, parameter_85 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_264 = paddle._C_ops.reshape(add_132, full_int_array_38) + del add_132 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(reshape_264, full_int_array_40) + del reshape_264 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_117 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_266 = paddle._C_ops.reshape(transpose_117, full_int_array_41) + del transpose_117 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_266, full_int_array_29, [1, 2]) + del reshape_266 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_115 = [slice_118, full_33, full_18] + del slice_118 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_113 = paddle._C_ops.stack(combine_115, 0) + del combine_115 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_267 = paddle._C_ops.reshape(roll_17, stack_113) + del roll_17, stack_113 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_133 = paddle._C_ops.add(add_128, reshape_267) + del add_128, reshape_267 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_133, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_108 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del layer_norm_114, parameter_82 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_134 = paddle._C_ops.add(matmul_108, parameter_81) + del matmul_108, parameter_81 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_17 = paddle._C_ops.gelu(add_134, False) + del add_134 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_109 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del gelu_17, parameter_80 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_135 = paddle._C_ops.add(matmul_109, parameter_79) + del matmul_109, parameter_79 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_136 = paddle._C_ops.add(add_133, add_135) + del add_133, add_135 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_71 = paddle._C_ops.shape64(add_136) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_125 = paddle._C_ops.slice( + shape64_71, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_71 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_136, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_116 = [slice_125, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_114 = paddle._C_ops.stack(combine_116, 0) + del combine_116 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_268 = paddle._C_ops.reshape(layer_norm_117, stack_114) + del layer_norm_117, stack_114 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_72 = paddle._C_ops.shape64(reshape_268) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_126 = paddle._C_ops.slice( + shape64_72, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_72 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_117 = [slice_126, full_31, full_3, full_31, full_3, full_18] + del slice_126 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_115 = paddle._C_ops.stack(combine_117, 0) + del combine_117 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_269 = paddle._C_ops.reshape(reshape_268, stack_115) + del reshape_268, stack_115 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_118 = paddle._C_ops.transpose(reshape_269, [0, 1, 3, 2, 4, 5]) + del reshape_269 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_270 = paddle._C_ops.reshape(transpose_118, full_int_array_38) + del transpose_118 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_271 = paddle._C_ops.reshape(reshape_270, full_int_array_39) + del reshape_270 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_73 = paddle._C_ops.shape64(reshape_271) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_127 = paddle._C_ops.slice( + shape64_73, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_73 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_110 = paddle._C_ops.matmul(reshape_271, parameter_76, False, False) + del parameter_76, reshape_271 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_137 = paddle._C_ops.add(matmul_110, parameter_75) + del matmul_110, parameter_75 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_118 = [slice_127, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_116 = paddle._C_ops.stack(combine_118, 0) + del combine_118 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_272 = paddle._C_ops.reshape(add_137, stack_116) + del add_137, stack_116 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_119 = paddle._C_ops.transpose(reshape_272, [2, 0, 3, 1, 4]) + del reshape_272 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_128 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_129 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_130 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_119 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_128, full_8, float("0"), True) + del slice_128 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_120 = paddle._C_ops.transpose(slice_129, [0, 1, 3, 2]) + del slice_129 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_111 = paddle._C_ops.matmul(scale_18, transpose_120, False, False) + del scale_18, transpose_120 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_273 = paddle._C_ops.reshape(data_37, full_int_array_7) + del data_37 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_18 = paddle._C_ops.index_select(data_38, reshape_273, 0) + del data_38, reshape_273 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_274 = paddle._C_ops.reshape(index_select_18, full_int_array_8) + del index_select_18 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_121 = paddle._C_ops.transpose(reshape_274, [2, 0, 1]) + del reshape_274 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(transpose_121, full_int_array_0) + del transpose_121 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_138 = paddle._C_ops.add(matmul_111, unsqueeze_54) + del matmul_111, unsqueeze_54 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_18 = paddle._C_ops.softmax(add_138, -1) + del add_138 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_112 = paddle._C_ops.matmul(softmax_18, slice_130, False, False) + del slice_130, softmax_18 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_122 = paddle._C_ops.transpose(matmul_112, [0, 2, 1, 3]) + del matmul_112 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_119 = [slice_127, full_4, full_18] + del slice_127 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_117 = paddle._C_ops.stack(combine_119, 0) + del combine_119 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(transpose_122, stack_117) + del stack_117, transpose_122 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_113 = paddle._C_ops.matmul(reshape_275, parameter_74, False, False) + del parameter_74, reshape_275 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_139 = paddle._C_ops.add(matmul_113, parameter_73) + del matmul_113, parameter_73 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_276 = paddle._C_ops.reshape(add_139, full_int_array_38) + del add_139 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(reshape_276, full_int_array_40) + del reshape_276 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_123 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_278 = paddle._C_ops.reshape(transpose_123, full_int_array_41) + del transpose_123 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_120 = [slice_125, full_33, full_18] + del slice_125 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_118 = paddle._C_ops.stack(combine_120, 0) + del combine_120 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_279 = paddle._C_ops.reshape(reshape_278, stack_118) + del reshape_278, stack_118 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_140 = paddle._C_ops.add(add_136, reshape_279) + del add_136, reshape_279 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_114 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del layer_norm_120, parameter_70 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_141 = paddle._C_ops.add(matmul_114, parameter_69) + del matmul_114, parameter_69 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_18 = paddle._C_ops.gelu(add_141, False) + del add_141 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_115 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del gelu_18, parameter_68 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_142 = paddle._C_ops.add(matmul_115, parameter_67) + del matmul_115, parameter_67 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_143 = paddle._C_ops.add(add_140, add_142) + del add_140, add_142 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_74 = paddle._C_ops.shape64(add_143) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_131 = paddle._C_ops.slice( + shape64_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_74 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_121 = [slice_131, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_119 = paddle._C_ops.stack(combine_121, 0) + del combine_121 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_280 = paddle._C_ops.reshape(layer_norm_123, stack_119) + del layer_norm_123, stack_119 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_75 = paddle._C_ops.shape64(reshape_280) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_132 = paddle._C_ops.slice( + shape64_75, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_75 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_280, full_int_array_11, [1, 2]) + del reshape_280 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_76 = paddle._C_ops.shape64(roll_18) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_133 = paddle._C_ops.slice( + shape64_76, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_76 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_122 = [slice_133, full_31, full_3, full_31, full_3, full_18] + del slice_133 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_120 = paddle._C_ops.stack(combine_122, 0) + del combine_122 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_281 = paddle._C_ops.reshape(roll_18, stack_120) + del roll_18, stack_120 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_124 = paddle._C_ops.transpose(reshape_281, [0, 1, 3, 2, 4, 5]) + del reshape_281 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_282 = paddle._C_ops.reshape(transpose_124, full_int_array_38) + del transpose_124 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_283 = paddle._C_ops.reshape(reshape_282, full_int_array_39) + del reshape_282 + + # pd_op.full: (1x14x14x1xf32) <- () + full_44 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_44, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(set_value__9, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_125 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_285 = paddle._C_ops.reshape(transpose_125, full_int_array_27) + del transpose_125 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_286 = paddle._C_ops.reshape(reshape_285, full_int_array_28) + del reshape_285 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_286, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_286, full_int_array_5) + del reshape_286 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_35, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_19 = paddle._C_ops.where(equal_9, full_36, where_18) + del equal_9, where_18 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_77 = paddle._C_ops.shape64(reshape_283) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_134 = paddle._C_ops.slice( + shape64_77, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_77 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_116 = paddle._C_ops.matmul(reshape_283, parameter_64, False, False) + del parameter_64, reshape_283 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_144 = paddle._C_ops.add(matmul_116, parameter_63) + del matmul_116, parameter_63 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_123 = [slice_134, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_121 = paddle._C_ops.stack(combine_123, 0) + del combine_123 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_287 = paddle._C_ops.reshape(add_144, stack_121) + del add_144, stack_121 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_126 = paddle._C_ops.transpose(reshape_287, [2, 0, 3, 1, 4]) + del reshape_287 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_135 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_136 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_137 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_126 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_135, full_8, float("0"), True) + del slice_135 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_127 = paddle._C_ops.transpose(slice_136, [0, 1, 3, 2]) + del slice_136 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_117 = paddle._C_ops.matmul(scale_19, transpose_127, False, False) + del scale_19, transpose_127 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_288 = paddle._C_ops.reshape(data_39, full_int_array_7) + del data_39 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_19 = paddle._C_ops.index_select(data_40, reshape_288, 0) + del data_40, reshape_288 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_289 = paddle._C_ops.reshape(index_select_19, full_int_array_8) + del index_select_19 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_128 = paddle._C_ops.transpose(reshape_289, [2, 0, 1]) + del reshape_289 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(transpose_128, full_int_array_0) + del transpose_128 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_145 = paddle._C_ops.add(matmul_117, unsqueeze_57) + del matmul_117, unsqueeze_57 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_9 = paddle._C_ops.floor_divide(slice_134, full_37) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_124 = [floor_divide_9, full_21, full_32, full_4, full_4] + del floor_divide_9 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_122 = paddle._C_ops.stack(combine_124, 0) + del combine_124 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_290 = paddle._C_ops.reshape(add_145, stack_122) + del add_145, stack_122 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(where_19, full_int_array_1) + del where_19 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(unsqueeze_58, full_int_array_0) + del unsqueeze_58 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_146 = paddle._C_ops.add(reshape_290, unsqueeze_59) + del reshape_290, unsqueeze_59 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_125 = [slice_134, full_32, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_123 = paddle._C_ops.stack(combine_125, 0) + del combine_125 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(add_146, stack_123) + del add_146, stack_123 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_19 = paddle._C_ops.softmax(reshape_291, -1) + del reshape_291 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_118 = paddle._C_ops.matmul(softmax_19, slice_137, False, False) + del slice_137, softmax_19 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_129 = paddle._C_ops.transpose(matmul_118, [0, 2, 1, 3]) + del matmul_118 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_126 = [slice_134, full_4, full_18] + del slice_134 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_124 = paddle._C_ops.stack(combine_126, 0) + del combine_126 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_292 = paddle._C_ops.reshape(transpose_129, stack_124) + del stack_124, transpose_129 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_119 = paddle._C_ops.matmul(reshape_292, parameter_62, False, False) + del parameter_62, reshape_292 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_147 = paddle._C_ops.add(matmul_119, parameter_61) + del matmul_119, parameter_61 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_293 = paddle._C_ops.reshape(add_147, full_int_array_38) + del add_147 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_294 = paddle._C_ops.reshape(reshape_293, full_int_array_40) + del reshape_293 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_130 = paddle._C_ops.transpose(reshape_294, [0, 1, 3, 2, 4, 5]) + del reshape_294 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(transpose_130, full_int_array_41) + del transpose_130 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_295, full_int_array_29, [1, 2]) + del reshape_295 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_127 = [slice_131, full_33, full_18] + del slice_131 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_125 = paddle._C_ops.stack(combine_127, 0) + del combine_127 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_296 = paddle._C_ops.reshape(roll_19, stack_125) + del roll_19, stack_125 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_148 = paddle._C_ops.add(add_143, reshape_296) + del add_143, reshape_296 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_148, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_120 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del layer_norm_126, parameter_58 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_149 = paddle._C_ops.add(matmul_120, parameter_57) + del matmul_120, parameter_57 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_19 = paddle._C_ops.gelu(add_149, False) + del add_149 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_121 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del gelu_19, parameter_56 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_150 = paddle._C_ops.add(matmul_121, parameter_55) + del matmul_121, parameter_55 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_151 = paddle._C_ops.add(add_148, add_150) + del add_148, add_150 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_78 = paddle._C_ops.shape64(add_151) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_138 = paddle._C_ops.slice( + shape64_78, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_78 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_128 = [slice_138, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_126 = paddle._C_ops.stack(combine_128, 0) + del combine_128 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_297 = paddle._C_ops.reshape(layer_norm_129, stack_126) + del layer_norm_129, stack_126 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_79 = paddle._C_ops.shape64(reshape_297) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_139 = paddle._C_ops.slice( + shape64_79, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_79 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_129 = [slice_139, full_31, full_3, full_31, full_3, full_18] + del slice_139 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_127 = paddle._C_ops.stack(combine_129, 0) + del combine_129 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_298 = paddle._C_ops.reshape(reshape_297, stack_127) + del reshape_297, stack_127 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_131 = paddle._C_ops.transpose(reshape_298, [0, 1, 3, 2, 4, 5]) + del reshape_298 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_299 = paddle._C_ops.reshape(transpose_131, full_int_array_38) + del transpose_131 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_300 = paddle._C_ops.reshape(reshape_299, full_int_array_39) + del reshape_299 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_80 = paddle._C_ops.shape64(reshape_300) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_140 = paddle._C_ops.slice( + shape64_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_80 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_122 = paddle._C_ops.matmul(reshape_300, parameter_52, False, False) + del parameter_52, reshape_300 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_152 = paddle._C_ops.add(matmul_122, parameter_51) + del matmul_122, parameter_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_130 = [slice_140, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_128 = paddle._C_ops.stack(combine_130, 0) + del combine_130 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_301 = paddle._C_ops.reshape(add_152, stack_128) + del add_152, stack_128 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_132 = paddle._C_ops.transpose(reshape_301, [2, 0, 3, 1, 4]) + del reshape_301 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_141 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_142 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_143 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_132 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_141, full_8, float("0"), True) + del slice_141 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_133 = paddle._C_ops.transpose(slice_142, [0, 1, 3, 2]) + del slice_142 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_123 = paddle._C_ops.matmul(scale_20, transpose_133, False, False) + del scale_20, transpose_133 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_302 = paddle._C_ops.reshape(data_41, full_int_array_7) + del data_41 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_20 = paddle._C_ops.index_select(data_42, reshape_302, 0) + del data_42, reshape_302 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_303 = paddle._C_ops.reshape(index_select_20, full_int_array_8) + del index_select_20 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_134 = paddle._C_ops.transpose(reshape_303, [2, 0, 1]) + del reshape_303 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(transpose_134, full_int_array_0) + del transpose_134 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_153 = paddle._C_ops.add(matmul_123, unsqueeze_60) + del matmul_123, unsqueeze_60 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_20 = paddle._C_ops.softmax(add_153, -1) + del add_153 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_20, slice_143, False, False) + del slice_143, softmax_20 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_135 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_131 = [slice_140, full_4, full_18] + del slice_140 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_129 = paddle._C_ops.stack(combine_131, 0) + del combine_131 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_304 = paddle._C_ops.reshape(transpose_135, stack_129) + del stack_129, transpose_135 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_125 = paddle._C_ops.matmul(reshape_304, parameter_50, False, False) + del parameter_50, reshape_304 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_154 = paddle._C_ops.add(matmul_125, parameter_49) + del matmul_125, parameter_49 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_305 = paddle._C_ops.reshape(add_154, full_int_array_38) + del add_154 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_306 = paddle._C_ops.reshape(reshape_305, full_int_array_40) + del reshape_305 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_136 = paddle._C_ops.transpose(reshape_306, [0, 1, 3, 2, 4, 5]) + del reshape_306 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(transpose_136, full_int_array_41) + del transpose_136 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_132 = [slice_138, full_33, full_18] + del slice_138 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_130 = paddle._C_ops.stack(combine_132, 0) + del combine_132 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_308 = paddle._C_ops.reshape(reshape_307, stack_130) + del reshape_307, stack_130 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_155 = paddle._C_ops.add(add_151, reshape_308) + del add_151, reshape_308 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_155, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_126 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del layer_norm_132, parameter_46 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_156 = paddle._C_ops.add(matmul_126, parameter_45) + del matmul_126, parameter_45 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_20 = paddle._C_ops.gelu(add_156, False) + del add_156 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_127 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del gelu_20, parameter_44 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_157 = paddle._C_ops.add(matmul_127, parameter_43) + del matmul_127, parameter_43 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_158 = paddle._C_ops.add(add_155, add_157) + del add_155, add_157 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_81 = paddle._C_ops.shape64(add_158) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_144 = paddle._C_ops.slice( + shape64_81, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_81 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_158, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_133 = [slice_144, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_131 = paddle._C_ops.stack(combine_133, 0) + del combine_133 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_309 = paddle._C_ops.reshape(layer_norm_135, stack_131) + del layer_norm_135, stack_131 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_82 = paddle._C_ops.shape64(reshape_309) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_145 = paddle._C_ops.slice( + shape64_82, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_82 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_309, full_int_array_11, [1, 2]) + del reshape_309 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_83 = paddle._C_ops.shape64(roll_20) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_146 = paddle._C_ops.slice( + shape64_83, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_83 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_134 = [slice_146, full_31, full_3, full_31, full_3, full_18] + del full_31, slice_146 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_132 = paddle._C_ops.stack(combine_134, 0) + del combine_134 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_310 = paddle._C_ops.reshape(roll_20, stack_132) + del roll_20, stack_132 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_137 = paddle._C_ops.transpose(reshape_310, [0, 1, 3, 2, 4, 5]) + del reshape_310 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_311 = paddle._C_ops.reshape(transpose_137, full_int_array_38) + del transpose_137 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_312 = paddle._C_ops.reshape(reshape_311, full_int_array_39) + del full_int_array_39, reshape_311 + + # pd_op.full: (1x14x14x1xf32) <- () + full_45 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_45, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_45 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(set_value__10, full_int_array_42) + del full_int_array_42 + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_138 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_314 = paddle._C_ops.reshape(transpose_138, full_int_array_27) + del transpose_138 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_315 = paddle._C_ops.reshape(reshape_314, full_int_array_28) + del reshape_314 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_315, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_315, full_int_array_5) + del reshape_315 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_35, subtract_10) + del full_35, not_equal_10, subtract_10 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_11) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_21 = paddle._C_ops.where(equal_10, full_36, where_20) + del equal_10, full_36, where_20 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_84 = paddle._C_ops.shape64(reshape_312) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_147 = paddle._C_ops.slice( + shape64_84, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_84 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_128 = paddle._C_ops.matmul(reshape_312, parameter_40, False, False) + del parameter_40, reshape_312 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_159 = paddle._C_ops.add(matmul_128, parameter_39) + del matmul_128, parameter_39 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_135 = [slice_147, full_4, full_5, full_32, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_133 = paddle._C_ops.stack(combine_135, 0) + del combine_135 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_316 = paddle._C_ops.reshape(add_159, stack_133) + del add_159, stack_133 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_139 = paddle._C_ops.transpose(reshape_316, [2, 0, 3, 1, 4]) + del reshape_316 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_148 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_149 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_150 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_139 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_148, full_8, float("0"), True) + del slice_148 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_140 = paddle._C_ops.transpose(slice_149, [0, 1, 3, 2]) + del slice_149 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_129 = paddle._C_ops.matmul(scale_21, transpose_140, False, False) + del scale_21, transpose_140 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_317 = paddle._C_ops.reshape(data_43, full_int_array_7) + del data_43 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_21 = paddle._C_ops.index_select(data_44, reshape_317, 0) + del data_44, reshape_317 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_21, full_int_array_8) + del index_select_21 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_141 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(transpose_141, full_int_array_0) + del transpose_141 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_160 = paddle._C_ops.add(matmul_129, unsqueeze_63) + del matmul_129, unsqueeze_63 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_10 = paddle._C_ops.floor_divide(slice_147, full_37) + del full_37 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_136 = [floor_divide_10, full_21, full_32, full_4, full_4] + del floor_divide_10, full_21 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_134 = paddle._C_ops.stack(combine_136, 0) + del combine_136 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_319 = paddle._C_ops.reshape(add_160, stack_134) + del add_160, stack_134 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(where_21, full_int_array_1) + del where_21 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(unsqueeze_64, full_int_array_0) + del unsqueeze_64 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_161 = paddle._C_ops.add(reshape_319, unsqueeze_65) + del reshape_319, unsqueeze_65 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_137 = [slice_147, full_32, full_4, full_4] + del full_32 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_135 = paddle._C_ops.stack(combine_137, 0) + del combine_137 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_320 = paddle._C_ops.reshape(add_161, stack_135) + del add_161, stack_135 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_21 = paddle._C_ops.softmax(reshape_320, -1) + del reshape_320 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_21, slice_150, False, False) + del slice_150, softmax_21 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_142 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_138 = [slice_147, full_4, full_18] + del slice_147 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_136 = paddle._C_ops.stack(combine_138, 0) + del combine_138 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_321 = paddle._C_ops.reshape(transpose_142, stack_136) + del stack_136, transpose_142 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_131 = paddle._C_ops.matmul(reshape_321, parameter_38, False, False) + del parameter_38, reshape_321 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_162 = paddle._C_ops.add(matmul_131, parameter_37) + del matmul_131, parameter_37 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_322 = paddle._C_ops.reshape(add_162, full_int_array_38) + del add_162, full_int_array_38 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_323 = paddle._C_ops.reshape(reshape_322, full_int_array_40) + del full_int_array_40, reshape_322 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_143 = paddle._C_ops.transpose(reshape_323, [0, 1, 3, 2, 4, 5]) + del reshape_323 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_324 = paddle._C_ops.reshape(transpose_143, full_int_array_41) + del full_int_array_41, transpose_143 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_324, full_int_array_29, [1, 2]) + del reshape_324 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_139 = [slice_144, full_33, full_18] + del full_33, slice_144 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_137 = paddle._C_ops.stack(combine_139, 0) + del combine_139 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_325 = paddle._C_ops.reshape(roll_21, stack_137) + del roll_21, stack_137 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_163 = paddle._C_ops.add(add_158, reshape_325) + del add_158, reshape_325 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_163, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_132 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del layer_norm_138, parameter_34 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_164 = paddle._C_ops.add(matmul_132, parameter_33) + del matmul_132, parameter_33 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_21 = paddle._C_ops.gelu(add_164, False) + del add_164 + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_133 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del gelu_21, parameter_32 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_165 = paddle._C_ops.add(matmul_133, parameter_31) + del matmul_133, parameter_31 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_166 = paddle._C_ops.add(add_163, add_165) + del add_163, add_165 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_85 = paddle._C_ops.shape64(add_166) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_151 = paddle._C_ops.slice( + shape64_85, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_85 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_140 = [slice_151, full_30, full_30, full_18] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_138 = paddle._C_ops.stack(combine_140, 0) + del combine_140 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_326 = paddle._C_ops.reshape(add_166, stack_138) + del add_166, stack_138 + + # pd_op.strided_slice: (-1x7x7x768xf32) <- (-1x14x14x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x7x7x768xf32) <- (-1x14x14x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + del full_int_array_31 + + # pd_op.strided_slice: (-1x7x7x768xf32) <- (-1x14x14x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + del full_int_array_32 + + # pd_op.strided_slice: (-1x7x7x768xf32) <- (-1x14x14x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + del full_int_array_30 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_86 = paddle._C_ops.shape64(reshape_326) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_152 = paddle._C_ops.slice( + shape64_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_86 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_141 = [slice_152, full_30, full_30, full_18] + del full_18, full_30, slice_152 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_139 = paddle._C_ops.stack(combine_141, 0) + del combine_141 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(reshape_326, stack_139) + del reshape_326, stack_139 + + # builtin.combine: ([-1x7x7x768xf32, -1x7x7x768xf32, -1x7x7x768xf32, -1x7x7x768xf32]) <- (-1x7x7x768xf32, -1x7x7x768xf32, -1x7x7x768xf32, -1x7x7x768xf32) + combine_142 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + del strided_slice_10, strided_slice_11, strided_slice_8, strided_slice_9 + + # pd_op.concat: (-1x7x7x3072xf32) <- ([-1x7x7x768xf32, -1x7x7x768xf32, -1x7x7x768xf32, -1x7x7x768xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_142, full_16) + del combine_142, full_16 + + # pd_op.full: (xi64) <- () + full_46 = paddle._C_ops.full( + [], float("3072"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_143 = [slice_151, full_17, full_46] + del full_17, full_46, slice_151 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_140 = paddle._C_ops.stack(combine_143, 0) + del combine_143 + + # pd_op.reshape: (-1x-1x3072xf32) <- (-1x7x7x3072xf32, 3xi64) + reshape_328 = paddle._C_ops.reshape(concat_2, stack_140) + del concat_2, stack_140 + + # pd_op.layer_norm: (-1x-1x3072xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x3072xf32, 3072xf32, 3072xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_328, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30, reshape_328 + + # pd_op.matmul: (-1x-1x1536xf32) <- (-1x-1x3072xf32, 3072x1536xf32) + matmul_134 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del layer_norm_141, parameter_28 + + # pd_op.shape64: (3xi64) <- (-1x-1x1536xf32) + shape64_87 = paddle._C_ops.shape64(matmul_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_153 = paddle._C_ops.slice( + shape64_87, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_87 + + # pd_op.shape64: (3xi64) <- (-1x-1x1536xf32) + shape64_88 = paddle._C_ops.shape64(matmul_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_154 = paddle._C_ops.slice( + shape64_88, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_88 + + # pd_op.layer_norm: (-1x-1x1536xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1536xf32, 1536xf32, 1536xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_134, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_144 = [slice_153, full_3, full_3, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_141 = paddle._C_ops.stack(combine_144, 0) + del combine_144 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x-1x1536xf32, 4xi64) + reshape_329 = paddle._C_ops.reshape(layer_norm_144, stack_141) + del layer_norm_144, stack_141 + + # pd_op.shape64: (4xi64) <- (-1x7x7x1536xf32) + shape64_89 = paddle._C_ops.shape64(reshape_329) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_155 = paddle._C_ops.slice( + shape64_89, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_89 + + # pd_op.full: (xi64) <- () + full_47 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_145 = [slice_155, full_47, full_3, full_47, full_3, full_29] + del slice_155 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_142 = paddle._C_ops.stack(combine_145, 0) + del combine_145 + + # pd_op.reshape: (-1x1x7x1x7x1536xf32) <- (-1x7x7x1536xf32, 6xi64) + reshape_330 = paddle._C_ops.reshape(reshape_329, stack_142) + del reshape_329, stack_142 + + # pd_op.transpose: (-1x1x1x7x7x1536xf32) <- (-1x1x7x1x7x1536xf32) + transpose_144 = paddle._C_ops.transpose(reshape_330, [0, 1, 3, 2, 4, 5]) + del reshape_330 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_43 = [-1, 7, 7, 1536] + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x1x1x7x7x1536xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(transpose_144, full_int_array_43) + del transpose_144 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_44 = [-1, 49, 1536] + + # pd_op.reshape: (-1x49x1536xf32) <- (-1x7x7x1536xf32, 3xi64) + reshape_332 = paddle._C_ops.reshape(reshape_331, full_int_array_44) + del reshape_331 + + # pd_op.shape64: (3xi64) <- (-1x49x1536xf32) + shape64_90 = paddle._C_ops.shape64(reshape_332) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_156 = paddle._C_ops.slice( + shape64_90, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_90 + + # pd_op.matmul: (-1x49x4608xf32) <- (-1x49x1536xf32, 1536x4608xf32) + matmul_135 = paddle._C_ops.matmul(reshape_332, parameter_25, False, False) + del parameter_25, reshape_332 + + # pd_op.add: (-1x49x4608xf32) <- (-1x49x4608xf32, 4608xf32) + add_167 = paddle._C_ops.add(matmul_135, parameter_24) + del matmul_135, parameter_24 + + # pd_op.full: (xi64) <- () + full_48 = paddle._C_ops.full( + [], float("48"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_146 = [slice_156, full_4, full_5, full_48, full_7] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_143 = paddle._C_ops.stack(combine_146, 0) + del combine_146 + + # pd_op.reshape: (-1x49x3x48x32xf32) <- (-1x49x4608xf32, 5xi64) + reshape_333 = paddle._C_ops.reshape(add_167, stack_143) + del add_167, stack_143 + + # pd_op.transpose: (3x-1x48x49x32xf32) <- (-1x49x3x48x32xf32) + transpose_145 = paddle._C_ops.transpose(reshape_333, [2, 0, 3, 1, 4]) + del reshape_333 + + # pd_op.slice: (-1x48x49x32xf32) <- (3x-1x48x49x32xf32, 1xi64, 1xi64) + slice_157 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x48x49x32xf32) <- (3x-1x48x49x32xf32, 1xi64, 1xi64) + slice_158 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x48x49x32xf32) <- (3x-1x48x49x32xf32, 1xi64, 1xi64) + slice_159 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_145 + + # pd_op.scale: (-1x48x49x32xf32) <- (-1x48x49x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_157, full_8, float("0"), True) + del slice_157 + + # pd_op.transpose: (-1x48x32x49xf32) <- (-1x48x49x32xf32) + transpose_146 = paddle._C_ops.transpose(slice_158, [0, 1, 3, 2]) + del slice_158 + + # pd_op.matmul: (-1x48x49x49xf32) <- (-1x48x49x32xf32, -1x48x32x49xf32) + matmul_136 = paddle._C_ops.matmul(scale_22, transpose_146, False, False) + del scale_22, transpose_146 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_334 = paddle._C_ops.reshape(data_45, full_int_array_7) + del data_45 + + # pd_op.index_select: (2401x48xf32) <- (169x48xf32, 2401xi64) + index_select_22 = paddle._C_ops.index_select(data_46, reshape_334, 0) + del data_46, reshape_334 + + # pd_op.reshape: (49x49x48xf32) <- (2401x48xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_22, full_int_array_8) + del index_select_22 + + # pd_op.transpose: (48x49x49xf32) <- (49x49x48xf32) + transpose_147 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x48x49x49xf32) <- (48x49x49xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(transpose_147, full_int_array_0) + del transpose_147 + + # pd_op.add: (-1x48x49x49xf32) <- (-1x48x49x49xf32, 1x48x49x49xf32) + add_168 = paddle._C_ops.add(matmul_136, unsqueeze_66) + del matmul_136, unsqueeze_66 + + # pd_op.softmax: (-1x48x49x49xf32) <- (-1x48x49x49xf32) + softmax_22 = paddle._C_ops.softmax(add_168, -1) + del add_168 + + # pd_op.matmul: (-1x48x49x32xf32) <- (-1x48x49x49xf32, -1x48x49x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_22, slice_159, False, False) + del slice_159, softmax_22 + + # pd_op.transpose: (-1x49x48x32xf32) <- (-1x48x49x32xf32) + transpose_148 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_147 = [slice_156, full_4, full_29] + del slice_156 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_144 = paddle._C_ops.stack(combine_147, 0) + del combine_147 + + # pd_op.reshape: (-1x49x1536xf32) <- (-1x49x48x32xf32, 3xi64) + reshape_336 = paddle._C_ops.reshape(transpose_148, stack_144) + del stack_144, transpose_148 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536x1536xf32) + matmul_138 = paddle._C_ops.matmul(reshape_336, parameter_23, False, False) + del parameter_23, reshape_336 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_169 = paddle._C_ops.add(matmul_138, parameter_22) + del matmul_138, parameter_22 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x49x1536xf32, 4xi64) + reshape_337 = paddle._C_ops.reshape(add_169, full_int_array_43) + del add_169 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_45 = [-1, 1, 1, 7, 7, 1536] + + # pd_op.reshape: (-1x1x1x7x7x1536xf32) <- (-1x7x7x1536xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(reshape_337, full_int_array_45) + del reshape_337 + + # pd_op.transpose: (-1x1x7x1x7x1536xf32) <- (-1x1x1x7x7x1536xf32) + transpose_149 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x1x7x1x7x1536xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_149, full_int_array_43) + del transpose_149 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_148 = [slice_153, full_4, full_29] + del slice_153 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_145 = paddle._C_ops.stack(combine_148, 0) + del combine_148 + + # pd_op.reshape: (-1x49x1536xf32) <- (-1x7x7x1536xf32, 3xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, stack_145) + del reshape_339, stack_145 + + # pd_op.add: (-1x49x1536xf32) <- (-1x-1x1536xf32, -1x49x1536xf32) + add_170 = paddle._C_ops.add(matmul_134, reshape_340) + del matmul_134, reshape_340 + + # pd_op.layer_norm: (-1x49x1536xf32, -1x49xf32, -1x49xf32) <- (-1x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_170, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (-1x49x6144xf32) <- (-1x49x1536xf32, 1536x6144xf32) + matmul_139 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del layer_norm_147, parameter_19 + + # pd_op.add: (-1x49x6144xf32) <- (-1x49x6144xf32, 6144xf32) + add_171 = paddle._C_ops.add(matmul_139, parameter_18) + del matmul_139, parameter_18 + + # pd_op.gelu: (-1x49x6144xf32) <- (-1x49x6144xf32) + gelu_22 = paddle._C_ops.gelu(add_171, False) + del add_171 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x6144xf32, 6144x1536xf32) + matmul_140 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del gelu_22, parameter_17 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_172 = paddle._C_ops.add(matmul_140, parameter_16) + del matmul_140, parameter_16 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, -1x49x1536xf32) + add_173 = paddle._C_ops.add(add_170, add_172) + del add_170, add_172 + + # pd_op.shape64: (3xi64) <- (-1x49x1536xf32) + shape64_91 = paddle._C_ops.shape64(add_173) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_160 = paddle._C_ops.slice( + shape64_91, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_91 + + # pd_op.layer_norm: (-1x49x1536xf32, -1x49xf32, -1x49xf32) <- (-1x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_173, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_149 = [slice_160, full_3, full_3, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_146 = paddle._C_ops.stack(combine_149, 0) + del combine_149 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x49x1536xf32, 4xi64) + reshape_341 = paddle._C_ops.reshape(layer_norm_150, stack_146) + del layer_norm_150, stack_146 + + # pd_op.shape64: (4xi64) <- (-1x7x7x1536xf32) + shape64_92 = paddle._C_ops.shape64(reshape_341) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_161 = paddle._C_ops.slice( + shape64_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_92 + + # pd_op.roll: (-1x7x7x1536xf32) <- (-1x7x7x1536xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_341, full_int_array_11, [1, 2]) + del reshape_341 + + # pd_op.shape64: (4xi64) <- (-1x7x7x1536xf32) + shape64_93 = paddle._C_ops.shape64(roll_22) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_162 = paddle._C_ops.slice( + shape64_93, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_93 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_150 = [slice_162, full_47, full_3, full_47, full_3, full_29] + del full_3, slice_162 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_147 = paddle._C_ops.stack(combine_150, 0) + del combine_150 + + # pd_op.reshape: (-1x1x7x1x7x1536xf32) <- (-1x7x7x1536xf32, 6xi64) + reshape_342 = paddle._C_ops.reshape(roll_22, stack_147) + del roll_22, stack_147 + + # pd_op.transpose: (-1x1x1x7x7x1536xf32) <- (-1x1x7x1x7x1536xf32) + transpose_150 = paddle._C_ops.transpose(reshape_342, [0, 1, 3, 2, 4, 5]) + del reshape_342 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x1x1x7x7x1536xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(transpose_150, full_int_array_43) + del transpose_150 + + # pd_op.reshape: (-1x49x1536xf32) <- (-1x7x7x1536xf32, 3xi64) + reshape_344 = paddle._C_ops.reshape(reshape_343, full_int_array_44) + del full_int_array_44, reshape_343 + + # pd_op.full: (1x7x7x1xf32) <- () + full_49 = paddle._C_ops.full( + [1, 7, 7, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_49, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_49, full_int_array_12 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_15, set_value__100 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_17, full_int_array_18, set_value__101 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_19, set_value__102 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_13, set_value__103 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_16, full_int_array_21, set_value__104 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_22, full_int_array_23, set_value__105 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_20, full_int_array_24, set_value__106 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_11, full_int_array_25, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_46 = [1, 1, 7, 1, 7, 1] + + # pd_op.reshape: (1x1x7x1x7x1xf32) <- (1x7x7x1xf32, 6xi64) + reshape_345 = paddle._C_ops.reshape(set_value__11, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (1x1x1x7x7x1xf32) <- (1x1x7x1x7x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_345, [0, 1, 3, 2, 4, 5]) + del reshape_345 + + # pd_op.reshape: (1x7x7x1xf32) <- (1x1x1x7x7x1xf32, 4xi64) + reshape_346 = paddle._C_ops.reshape(transpose_151, full_int_array_27) + del full_int_array_27, transpose_151 + + # pd_op.reshape: (1x49xf32) <- (1x7x7x1xf32, 2xi64) + reshape_347 = paddle._C_ops.reshape(reshape_346, full_int_array_28) + del full_int_array_28, reshape_346 + + # pd_op.unsqueeze: (1x1x49xf32) <- (1x49xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_347, full_int_array_1) + + # pd_op.unsqueeze: (1x49x1xf32) <- (1x49xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_347, full_int_array_5) + del reshape_347 + + # pd_op.subtract: (1x49x49xf32) <- (1x1x49xf32, 1x49x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (1x49x49xb) <- (1x49x49xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_11) + + # pd_op.full: (1x49x49xf32) <- () + full_50 = paddle._C_ops.full( + [1, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_50, subtract_11) + del full_50, not_equal_11, subtract_11 + + # pd_op.equal: (1x49x49xb) <- (1x49x49xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_11) + del full_11 + + # pd_op.full: (1x49x49xf32) <- () + full_51 = paddle._C_ops.full( + [1, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_23 = paddle._C_ops.where(equal_11, full_51, where_22) + del equal_11, full_51, where_22 + + # pd_op.shape64: (3xi64) <- (-1x49x1536xf32) + shape64_94 = paddle._C_ops.shape64(reshape_344) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_163 = paddle._C_ops.slice( + shape64_94, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_94 + + # pd_op.matmul: (-1x49x4608xf32) <- (-1x49x1536xf32, 1536x4608xf32) + matmul_141 = paddle._C_ops.matmul(reshape_344, parameter_13, False, False) + del parameter_13, reshape_344 + + # pd_op.add: (-1x49x4608xf32) <- (-1x49x4608xf32, 4608xf32) + add_174 = paddle._C_ops.add(matmul_141, parameter_12) + del matmul_141, parameter_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_151 = [slice_163, full_4, full_5, full_48, full_7] + del full_5, full_7 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_148 = paddle._C_ops.stack(combine_151, 0) + del combine_151 + + # pd_op.reshape: (-1x49x3x48x32xf32) <- (-1x49x4608xf32, 5xi64) + reshape_348 = paddle._C_ops.reshape(add_174, stack_148) + del add_174, stack_148 + + # pd_op.transpose: (3x-1x48x49x32xf32) <- (-1x49x3x48x32xf32) + transpose_152 = paddle._C_ops.transpose(reshape_348, [2, 0, 3, 1, 4]) + del reshape_348 + + # pd_op.slice: (-1x48x49x32xf32) <- (3x-1x48x49x32xf32, 1xi64, 1xi64) + slice_164 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x48x49x32xf32) <- (3x-1x48x49x32xf32, 1xi64, 1xi64) + slice_165 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x48x49x32xf32) <- (3x-1x48x49x32xf32, 1xi64, 1xi64) + slice_166 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del full_int_array_6, transpose_152 + + # pd_op.scale: (-1x48x49x32xf32) <- (-1x48x49x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_164, full_8, float("0"), True) + del full_8, slice_164 + + # pd_op.transpose: (-1x48x32x49xf32) <- (-1x48x49x32xf32) + transpose_153 = paddle._C_ops.transpose(slice_165, [0, 1, 3, 2]) + del slice_165 + + # pd_op.matmul: (-1x48x49x49xf32) <- (-1x48x49x32xf32, -1x48x32x49xf32) + matmul_142 = paddle._C_ops.matmul(scale_23, transpose_153, False, False) + del scale_23, transpose_153 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_349 = paddle._C_ops.reshape(data_47, full_int_array_7) + del data_47, full_int_array_7 + + # pd_op.index_select: (2401x48xf32) <- (169x48xf32, 2401xi64) + index_select_23 = paddle._C_ops.index_select(data_48, reshape_349, 0) + del data_48, reshape_349 + + # pd_op.reshape: (49x49x48xf32) <- (2401x48xf32, 3xi64) + reshape_350 = paddle._C_ops.reshape(index_select_23, full_int_array_8) + del full_int_array_8, index_select_23 + + # pd_op.transpose: (48x49x49xf32) <- (49x49x48xf32) + transpose_154 = paddle._C_ops.transpose(reshape_350, [2, 0, 1]) + del reshape_350 + + # pd_op.unsqueeze: (1x48x49x49xf32) <- (48x49x49xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(transpose_154, full_int_array_0) + del transpose_154 + + # pd_op.add: (-1x48x49x49xf32) <- (-1x48x49x49xf32, 1x48x49x49xf32) + add_175 = paddle._C_ops.add(matmul_142, unsqueeze_69) + del matmul_142, unsqueeze_69 + + # pd_op.full: (xi64) <- () + full_52 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_11 = paddle._C_ops.floor_divide(slice_163, full_52) + del full_52 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_152 = [floor_divide_11, full_47, full_48, full_4, full_4] + del floor_divide_11, full_47 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_149 = paddle._C_ops.stack(combine_152, 0) + del combine_152 + + # pd_op.reshape: (-1x1x48x49x49xf32) <- (-1x48x49x49xf32, 5xi64) + reshape_351 = paddle._C_ops.reshape(add_175, stack_149) + del add_175, stack_149 + + # pd_op.unsqueeze: (1x1x49x49xf32) <- (1x49x49xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(where_23, full_int_array_1) + del full_int_array_1, where_23 + + # pd_op.unsqueeze: (1x1x1x49x49xf32) <- (1x1x49x49xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(unsqueeze_70, full_int_array_0) + del full_int_array_0, unsqueeze_70 + + # pd_op.add: (-1x1x48x49x49xf32) <- (-1x1x48x49x49xf32, 1x1x1x49x49xf32) + add_176 = paddle._C_ops.add(reshape_351, unsqueeze_71) + del reshape_351, unsqueeze_71 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_153 = [slice_163, full_48, full_4, full_4] + del full_48 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_150 = paddle._C_ops.stack(combine_153, 0) + del combine_153 + + # pd_op.reshape: (-1x48x49x49xf32) <- (-1x1x48x49x49xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(add_176, stack_150) + del add_176, stack_150 + + # pd_op.softmax: (-1x48x49x49xf32) <- (-1x48x49x49xf32) + softmax_23 = paddle._C_ops.softmax(reshape_352, -1) + del reshape_352 + + # pd_op.matmul: (-1x48x49x32xf32) <- (-1x48x49x49xf32, -1x48x49x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_23, slice_166, False, False) + del slice_166, softmax_23 + + # pd_op.transpose: (-1x49x48x32xf32) <- (-1x48x49x32xf32) + transpose_155 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_154 = [slice_163, full_4, full_29] + del slice_163 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_151 = paddle._C_ops.stack(combine_154, 0) + del combine_154 + + # pd_op.reshape: (-1x49x1536xf32) <- (-1x49x48x32xf32, 3xi64) + reshape_353 = paddle._C_ops.reshape(transpose_155, stack_151) + del stack_151, transpose_155 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536x1536xf32) + matmul_144 = paddle._C_ops.matmul(reshape_353, parameter_11, False, False) + del parameter_11, reshape_353 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_177 = paddle._C_ops.add(matmul_144, parameter_10) + del matmul_144, parameter_10 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x49x1536xf32, 4xi64) + reshape_354 = paddle._C_ops.reshape(add_177, full_int_array_43) + del add_177 + + # pd_op.reshape: (-1x1x1x7x7x1536xf32) <- (-1x7x7x1536xf32, 6xi64) + reshape_355 = paddle._C_ops.reshape(reshape_354, full_int_array_45) + del full_int_array_45, reshape_354 + + # pd_op.transpose: (-1x1x7x1x7x1536xf32) <- (-1x1x1x7x7x1536xf32) + transpose_156 = paddle._C_ops.transpose(reshape_355, [0, 1, 3, 2, 4, 5]) + del reshape_355 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x1x7x1x7x1536xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(transpose_156, full_int_array_43) + del full_int_array_43, transpose_156 + + # pd_op.roll: (-1x7x7x1536xf32) <- (-1x7x7x1536xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_356, full_int_array_29, [1, 2]) + del full_int_array_29, reshape_356 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_155 = [slice_160, full_4, full_29] + del full_29, full_4, slice_160 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_152 = paddle._C_ops.stack(combine_155, 0) + del combine_155 + + # pd_op.reshape: (-1x49x1536xf32) <- (-1x7x7x1536xf32, 3xi64) + reshape_357 = paddle._C_ops.reshape(roll_23, stack_152) + del roll_23, stack_152 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, -1x49x1536xf32) + add_178 = paddle._C_ops.add(add_173, reshape_357) + del add_173, reshape_357 + + # pd_op.layer_norm: (-1x49x1536xf32, -1x49xf32, -1x49xf32) <- (-1x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_178, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (-1x49x6144xf32) <- (-1x49x1536xf32, 1536x6144xf32) + matmul_145 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del layer_norm_153, parameter_7 + + # pd_op.add: (-1x49x6144xf32) <- (-1x49x6144xf32, 6144xf32) + add_179 = paddle._C_ops.add(matmul_145, parameter_6) + del matmul_145, parameter_6 + + # pd_op.gelu: (-1x49x6144xf32) <- (-1x49x6144xf32) + gelu_23 = paddle._C_ops.gelu(add_179, False) + del add_179 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x6144xf32, 6144x1536xf32) + matmul_146 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del gelu_23, parameter_5 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_180 = paddle._C_ops.add(matmul_146, parameter_4) + del matmul_146, parameter_4 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, -1x49x1536xf32) + add_181 = paddle._C_ops.add(add_178, add_180) + del add_178, add_180 + + # pd_op.layer_norm: (-1x49x1536xf32, -1x49xf32, -1x49xf32) <- (-1x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_156, layer_norm_157, layer_norm_158 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_181, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_181, parameter_2, parameter_3 + + # pd_op.transpose: (-1x1536x49xf32) <- (-1x49x1536xf32) + transpose_157 = paddle._C_ops.transpose(layer_norm_156, [0, 2, 1]) + del layer_norm_156 + + # pd_op.unsqueeze: (-1x1536x1x49xf32) <- (-1x1536x49xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(transpose_157, full_int_array_5) + del transpose_157 + + # pd_op.pool2d: (-1x1536x1x1xf32) <- (-1x1536x1x49xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_72, + full_int_array_14, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_14, unsqueeze_72 + + # pd_op.squeeze: (-1x1536x1xf32) <- (-1x1536x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_5) + del full_int_array_5, pool2d_0 + + # pd_op.flatten: (-1x1536xf32) <- (-1x1536x1xf32) + flatten_1 = paddle._C_ops.flatten(squeeze_0, 1, 2) + del squeeze_0 + + # pd_op.matmul: (-1x102xf32) <- (-1x1536xf32, 1536x102xf32) + matmul_147 = paddle._C_ops.matmul(flatten_1, parameter_1, False, False) + del flatten_1, parameter_1 + + # pd_op.add: (-1x102xf32) <- (-1x102xf32, 102xf32) + add_0 = paddle._C_ops.add(matmul_147, parameter_0) + del matmul_147, parameter_0 + + return ( + add_0, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/weight_meta.py new file mode 100644 index 00000000..88a6a1a4 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_1/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [1536, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [6144, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1536, 6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [1536, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1536, 4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [6144, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [1536, 6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1536, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [1536, 4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [3072, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [1536, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [768, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [192, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/graph_net.json b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/input_meta.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/input_meta.py new file mode 100644 index 00000000..24804998 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [4, 3, 224, 224] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [169, 6] + dtype = "float32" + low = -10.331 + high = 6.98731 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [169, 6] + dtype = "float32" + low = -8.58566 + high = 5.9563 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [169, 12] + dtype = "float32" + low = -9.91299 + high = 6.31486 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [169, 12] + dtype = "float32" + low = -8.17235 + high = 5.93824 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [169, 48] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [169, 48] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/model.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/model.py new file mode 100644 index 00000000..218161b5 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/model.py @@ -0,0 +1,13792 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.shape64: (4xi64) <- (-1x3x224x224xf32) + shape64_0 = paddle._C_ops.shape64(data_0) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [0] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_266 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_259 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_256 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_249 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_233 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_226 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_223 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_216 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_213 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_206 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_203 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_196 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_193 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_186 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_183 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_176 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_173 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_166 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_163 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_156 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_153 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_146 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_143 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_136 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_133 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_126 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_123 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_116 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_113 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_106 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_103 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_96 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_93 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_86 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_83 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_76 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_73 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_66 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_63 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_56 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_40 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_33 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_30 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_23 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_12 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_5 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_4 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_0 = full_int_array_7 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_8 = [1] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_261 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_260 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_251 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_250 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_228 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_227 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_218 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_217 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_208 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_207 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_198 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_197 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_188 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_187 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_178 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_177 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_168 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_167 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_158 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_157 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_148 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_147 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_138 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_137 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_128 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_127 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_118 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_117 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_108 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_107 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_98 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_97 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_88 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_87 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_78 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_77 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_68 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_67 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_58 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_57 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_35 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_34 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_25 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_24 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_7 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_6 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_2 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_1 = full_int_array_8 + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + shape64_0, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_0 + + # pd_op.conv2d: (-1x192x56x56xf32) <- (-1x3x224x224xf32, 192x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, -1, 1, 1] + + # pd_op.reshape: (1x192x1x1xf32) <- (192xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_9) + del full_int_array_9, parameter_303 + + # pd_op.add: (-1x192x56x56xf32) <- (-1x192x56x56xf32, 1x192x1x1xf32) + add_0 = paddle._C_ops.add(conv2d_0, reshape_0) + + # pd_op.shape64: (4xi64) <- (-1x192x56x56xf32) + shape64_1 = paddle._C_ops.shape64(add_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + shape64_1, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_1 + + # pd_op.flatten: (-1x192x3136xf32) <- (-1x192x56x56xf32) + flatten_1 = paddle._C_ops.flatten(add_0, 2, 3) + + # pd_op.transpose: (-1x3136x192xf32) <- (-1x192x3136xf32) + transpose_0 = paddle._C_ops.transpose(flatten_1, [0, 2, 1]) + del flatten_1 + + # pd_op.layer_norm: (-1x3136x192xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x192xf32, 192xf32, 192xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302 + + # pd_op.shape64: (3xi64) <- (-1x3136x192xf32) + shape64_2 = paddle._C_ops.shape64(layer_norm_0) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + shape64_2, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_2 + + # pd_op.layer_norm: (-1x3136x192xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x192xf32, 192xf32, 192xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full: (xi64) <- () + full_25 = paddle._C_ops.full( + [], float("56"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_26 = paddle._C_ops.full( + [], float("192"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_0 = [slice_26, full_25, full_25, full_26] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_0 = paddle._C_ops.stack(combine_0, 0) + del combine_0 + + # pd_op.reshape: (-1x56x56x192xf32) <- (-1x3136x192xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, stack_0) + del stack_0 + + # pd_op.shape64: (4xi64) <- (-1x56x56x192xf32) + shape64_3 = paddle._C_ops.shape64(reshape_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + shape64_3, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_3 + + # pd_op.full: (xi64) <- () + full_27 = paddle._C_ops.full( + [], float("8"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_28 = paddle._C_ops.full( + [], float("7"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_1 = [slice_27, full_27, full_28, full_27, full_28, full_26] + del slice_27 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_1 = paddle._C_ops.stack(combine_1, 0) + del combine_1 + + # pd_op.reshape: (-1x8x7x8x7x192xf32) <- (-1x56x56x192xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_1, stack_1) + del stack_1 + + # pd_op.transpose: (-1x8x8x7x7x192xf32) <- (-1x8x7x8x7x192xf32) + transpose_1 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [-1, 7, 7, 192] + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x8x8x7x7x192xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(transpose_1, full_int_array_10) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_11 = [-1, 49, 192] + + # pd_op.reshape: (-1x49x192xf32) <- (-1x7x7x192xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(reshape_2, full_int_array_11) + + # pd_op.shape64: (3xi64) <- (-1x49x192xf32) + shape64_4 = paddle._C_ops.shape64(reshape_3) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + shape64_4, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_4 + + # pd_op.matmul: (-1x49x576xf32) <- (-1x49x192xf32, 192x576xf32) + matmul_0 = paddle._C_ops.matmul(reshape_3, parameter_298, False, False) + del parameter_298 + + # pd_op.add: (-1x49x576xf32) <- (-1x49x576xf32, 576xf32) + add_1 = paddle._C_ops.add(matmul_0, parameter_297) + del parameter_297 + + # pd_op.full: (xi64) <- () + full_29 = paddle._C_ops.full( + [], float("49"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_30 = paddle._C_ops.full( + [], float("3"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_31 = paddle._C_ops.full( + [], float("6"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_32 = paddle._C_ops.full( + [], float("32"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_2 = [slice_28, full_29, full_30, full_31, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_2 = paddle._C_ops.stack(combine_2, 0) + del combine_2 + + # pd_op.reshape: (-1x49x3x6x32xf32) <- (-1x49x576xf32, 5xi64) + reshape_212 = paddle._C_ops.reshape(add_1, stack_2) + del stack_2 + + # pd_op.transpose: (3x-1x6x49x32xf32) <- (-1x49x3x6x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_212, [2, 0, 3, 1, 4]) + del reshape_212 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [2] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_271 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_269 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_263 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_262 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_253 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_252 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_230 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_229 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_220 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_219 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_210 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_209 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_200 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_199 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_190 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_189 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_180 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_179 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_170 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_169 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_160 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_159 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_150 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_149 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_140 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_139 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_130 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_129 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_120 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_119 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_110 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_109 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_100 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_99 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_90 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_89 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_80 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_79 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_70 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_69 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_60 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_59 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_37 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_36 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_27 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_26 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_9 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_8 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_3 = full_int_array_0 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [3] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_264 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_254 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_231 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_221 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_211 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_201 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_191 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_181 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_171 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_161 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_151 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_141 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_131 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_121 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_111 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_101 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_91 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_81 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_71 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_61 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_38 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_28 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_10 = full_int_array_1 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xf32) <- (1xf32) + assign_265 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_255 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_232 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_222 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_212 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_202 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_192 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_182 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_172 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_162 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_152 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_142 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_132 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_122 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_112 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_102 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_92 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_82 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_72 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_62 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_39 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_29 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_11 = full_0 + + # pd_op.scale: (-1x6x49x32xf32) <- (-1x6x49x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_29, full_0, float("0"), True) + del slice_29 + + # pd_op.transpose: (-1x6x32x49xf32) <- (-1x6x49x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_30, [0, 1, 3, 2]) + del slice_30 + + # pd_op.matmul: (-1x6x49x49xf32) <- (-1x6x49x32xf32, -1x6x32x49xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_12 = [-1] + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_4 = paddle._C_ops.reshape(data_1, full_int_array_12) + del data_1 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_0 = paddle._C_ops.index_select(data_2, reshape_4, 0) + del data_2 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_13 = [49, 49, -1] + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(index_select_0, full_int_array_13) + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_4 = paddle._C_ops.transpose(reshape_213, [2, 0, 1]) + del reshape_213 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_7) + + # pd_op.add: (-1x6x49x49xf32) <- (-1x6x49x49xf32, 1x6x49x49xf32) + add_170 = paddle._C_ops.add(matmul_1, unsqueeze_0) + + # pd_op.softmax: (-1x6x49x49xf32) <- (-1x6x49x49xf32) + softmax_0 = paddle._C_ops.softmax(add_170, -1) + del add_170 + + # pd_op.matmul: (-1x6x49x32xf32) <- (-1x6x49x49xf32, -1x6x49x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_0, slice_0, False, False) + + # pd_op.transpose: (-1x49x6x32xf32) <- (-1x6x49x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_3 = [slice_28, full_29, full_26] + del slice_28 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_3 = paddle._C_ops.stack(combine_3, 0) + del combine_3 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x49x6x32xf32, 3xi64) + reshape_5 = paddle._C_ops.reshape(transpose_5, stack_3) + del stack_3 + + # pd_op.matmul: (-1x49x192xf32) <- (-1x49x192xf32, 192x192xf32) + matmul_2 = paddle._C_ops.matmul(reshape_5, parameter_296, False, False) + del parameter_296 + + # pd_op.add: (-1x49x192xf32) <- (-1x49x192xf32, 192xf32) + add_2 = paddle._C_ops.add(matmul_2, parameter_295) + del parameter_295 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x49x192xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_2, full_int_array_10) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_14 = [-1, 8, 8, 7, 7, 192] + + # pd_op.reshape: (-1x8x8x7x7x192xf32) <- (-1x7x7x192xf32, 6xi64) + reshape_214 = paddle._C_ops.reshape(reshape_6, full_int_array_14) + + # pd_op.transpose: (-1x8x7x8x7x192xf32) <- (-1x8x8x7x7x192xf32) + transpose_6 = paddle._C_ops.transpose(reshape_214, [0, 1, 3, 2, 4, 5]) + del reshape_214 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_15 = [-1, 56, 56, 192] + + # pd_op.reshape: (-1x56x56x192xf32) <- (-1x8x7x8x7x192xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(transpose_6, full_int_array_15) + + # pd_op.full: (xi64) <- () + full_33 = paddle._C_ops.full( + [], float("3136"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_4 = [slice_26, full_33, full_26] + del slice_26 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_4 = paddle._C_ops.stack(combine_4, 0) + del combine_4 + + # pd_op.reshape: (-1x3136x192xf32) <- (-1x56x56x192xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(reshape_7, stack_4) + del stack_4 + + # pd_op.add: (-1x3136x192xf32) <- (-1x3136x192xf32, -1x3136x192xf32) + add_3 = paddle._C_ops.add(layer_norm_0, reshape_8) + + # pd_op.layer_norm: (-1x3136x192xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x192xf32, 192xf32, 192xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_3, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (-1x3136x768xf32) <- (-1x3136x192xf32, 192x768xf32) + matmul_3 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del parameter_292 + + # pd_op.add: (-1x3136x768xf32) <- (-1x3136x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_291) + del parameter_291 + + # pd_op.gelu: (-1x3136x768xf32) <- (-1x3136x768xf32) + gelu_0 = paddle._C_ops.gelu(add_4, False) + + # pd_op.matmul: (-1x3136x192xf32) <- (-1x3136x768xf32, 768x192xf32) + matmul_4 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del parameter_290 + + # pd_op.add: (-1x3136x192xf32) <- (-1x3136x192xf32, 192xf32) + add_5 = paddle._C_ops.add(matmul_4, parameter_289) + del parameter_289 + + # pd_op.add: (-1x3136x192xf32) <- (-1x3136x192xf32, -1x3136x192xf32) + add_6 = paddle._C_ops.add(add_3, add_5) + + # pd_op.shape64: (3xi64) <- (-1x3136x192xf32) + shape64_5 = paddle._C_ops.shape64(add_6) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + shape64_5, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_5 + + # pd_op.layer_norm: (-1x3136x192xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x192xf32, 192xf32, 192xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_5 = [slice_31, full_25, full_25, full_26] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_5 = paddle._C_ops.stack(combine_5, 0) + del combine_5 + + # pd_op.reshape: (-1x56x56x192xf32) <- (-1x3136x192xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(layer_norm_9, stack_5) + del stack_5 + + # pd_op.shape64: (4xi64) <- (-1x56x56x192xf32) + shape64_6 = paddle._C_ops.shape64(reshape_9) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + shape64_6, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_2 = [-3, -3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_258 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_225 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_205 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_185 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_165 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_145 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_125 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_105 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_85 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_65 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_32 = full_int_array_2 + + # pd_op.roll: (-1x56x56x192xf32) <- (-1x56x56x192xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_9, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x56x56x192xf32) + shape64_7 = paddle._C_ops.shape64(roll_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + shape64_7, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_7 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_6 = [slice_33, full_27, full_28, full_27, full_28, full_26] + del full_27, slice_33 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_6 = paddle._C_ops.stack(combine_6, 0) + del combine_6 + + # pd_op.reshape: (-1x8x7x8x7x192xf32) <- (-1x56x56x192xf32, 6xi64) + reshape_215 = paddle._C_ops.reshape(roll_0, stack_6) + del stack_6 + + # pd_op.transpose: (-1x8x8x7x7x192xf32) <- (-1x8x7x8x7x192xf32) + transpose_7 = paddle._C_ops.transpose(reshape_215, [0, 1, 3, 2, 4, 5]) + del reshape_215 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x8x8x7x7x192xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(transpose_7, full_int_array_10) + + # pd_op.reshape: (-1x49x192xf32) <- (-1x7x7x192xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(reshape_10, full_int_array_11) + del full_int_array_11 + + # pd_op.full: (1x56x56x1xf32) <- () + full_34 = paddle._C_ops.full( + [1, 56, 56, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_16 = [0, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_236 = full_int_array_16 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_43 = full_int_array_16 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_14 = full_int_array_16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_17 = [-7, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_18 = [1, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_270 = full_int_array_18 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_245 = full_int_array_18 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_52 = full_int_array_18 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_20 = full_int_array_18 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_34, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_34 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_19 = [0, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_20 = [-7, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [0, -3] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-7, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [-7, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [-3, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [-3, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_26 = [-3, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_27 = [2147483647, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [2147483647, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [2147483647, 2147483647] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_246 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_243 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_240 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_237 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_53 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_50 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_47 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_44 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_21 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_18 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_16 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_15 = full_int_array_29 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_30 = [1, 8, 7, 8, 7, 1] + + # pd_op.reshape: (1x8x7x8x7x1xf32) <- (1x56x56x1xf32, 6xi64) + reshape_216 = paddle._C_ops.reshape(set_value__0, full_int_array_30) + del full_int_array_30 + + # pd_op.transpose: (1x8x8x7x7x1xf32) <- (1x8x7x8x7x1xf32) + transpose_146 = paddle._C_ops.transpose(reshape_216, [0, 1, 3, 2, 4, 5]) + del reshape_216 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_31 = [-1, 7, 7, 1] + + # pd_op.reshape: (64x7x7x1xf32) <- (1x8x8x7x7x1xf32, 4xi64) + reshape_217 = paddle._C_ops.reshape(transpose_146, full_int_array_31) + del transpose_146 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [-1, 49] + + # pd_op.reshape: (64x49xf32) <- (64x7x7x1xf32, 2xi64) + reshape_218 = paddle._C_ops.reshape(reshape_217, full_int_array_32) + del reshape_217 + + # pd_op.unsqueeze: (64x1x49xf32) <- (64x49xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_8) + + # pd_op.unsqueeze: (64x49x1xf32) <- (64x49xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_0) + del reshape_218 + + # pd_op.subtract: (64x49x49xf32) <- (64x1x49xf32, 64x49x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.full: (xf32) <- () + full_35 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x49x49xb) <- (64x49x49xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_35) + + # pd_op.full: (64x49x49xf32) <- () + full_36 = paddle._C_ops.full( + [64, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_36, subtract_0) + del full_36, not_equal_0, subtract_0 + + # pd_op.equal: (64x49x49xb) <- (64x49x49xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_35) + + # pd_op.full: (64x49x49xf32) <- () + full_37 = paddle._C_ops.full( + [64, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_1 = paddle._C_ops.where(equal_0, full_37, where_0) + del equal_0, full_37, where_0 + + # pd_op.shape64: (3xi64) <- (-1x49x192xf32) + shape64_8 = paddle._C_ops.shape64(reshape_11) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + shape64_8, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_8 + + # pd_op.matmul: (-1x49x576xf32) <- (-1x49x192xf32, 192x576xf32) + matmul_5 = paddle._C_ops.matmul(reshape_11, parameter_286, False, False) + del parameter_286 + + # pd_op.add: (-1x49x576xf32) <- (-1x49x576xf32, 576xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_285) + del parameter_285 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_7 = [slice_34, full_29, full_30, full_31, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_7 = paddle._C_ops.stack(combine_7, 0) + del combine_7 + + # pd_op.reshape: (-1x49x3x6x32xf32) <- (-1x49x576xf32, 5xi64) + reshape_219 = paddle._C_ops.reshape(add_7, stack_7) + del stack_7 + + # pd_op.transpose: (3x-1x6x49x32xf32) <- (-1x49x3x6x32xf32) + transpose_8 = paddle._C_ops.transpose(reshape_219, [2, 0, 3, 1, 4]) + del reshape_219 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x6x49x32xf32) <- (-1x6x49x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_35, full_0, float("0"), True) + del slice_35 + + # pd_op.transpose: (-1x6x32x49xf32) <- (-1x6x49x32xf32) + transpose_9 = paddle._C_ops.transpose(slice_36, [0, 1, 3, 2]) + del slice_36 + + # pd_op.matmul: (-1x6x49x49xf32) <- (-1x6x49x32xf32, -1x6x32x49xf32) + matmul_6 = paddle._C_ops.matmul(scale_1, transpose_9, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_12 = paddle._C_ops.reshape(data_3, full_int_array_12) + del data_3 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_1 = paddle._C_ops.index_select(data_4, reshape_12, 0) + del data_4 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_220 = paddle._C_ops.reshape(index_select_1, full_int_array_13) + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_10 = paddle._C_ops.transpose(reshape_220, [2, 0, 1]) + del reshape_220 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(transpose_10, full_int_array_7) + + # pd_op.add: (-1x6x49x49xf32) <- (-1x6x49x49xf32, 1x6x49x49xf32) + add_8 = paddle._C_ops.add(matmul_6, unsqueeze_1) + + # pd_op.full: (xi64) <- () + full_38 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_0 = paddle._C_ops.floor_divide(slice_34, full_38) + del full_38 + + # pd_op.full: (xi64) <- () + full_39 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_8 = [floor_divide_0, full_39, full_31, full_29, full_29] + del floor_divide_0, full_39 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_8 = paddle._C_ops.stack(combine_8, 0) + del combine_8 + + # pd_op.reshape: (-1x64x6x49x49xf32) <- (-1x6x49x49xf32, 5xi64) + reshape_13 = paddle._C_ops.reshape(add_8, stack_8) + del stack_8 + + # pd_op.unsqueeze: (64x1x49x49xf32) <- (64x49x49xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(where_1, full_int_array_8) + del where_1 + + # pd_op.unsqueeze: (1x64x1x49x49xf32) <- (64x1x49x49xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(unsqueeze_39, full_int_array_7) + del unsqueeze_39 + + # pd_op.add: (-1x64x6x49x49xf32) <- (-1x64x6x49x49xf32, 1x64x1x49x49xf32) + add_9 = paddle._C_ops.add(reshape_13, unsqueeze_2) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_9 = [slice_34, full_31, full_29, full_29] + del full_31 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_9 = paddle._C_ops.stack(combine_9, 0) + del combine_9 + + # pd_op.reshape: (-1x6x49x49xf32) <- (-1x64x6x49x49xf32, 4xi64) + reshape_221 = paddle._C_ops.reshape(add_9, stack_9) + del stack_9 + + # pd_op.softmax: (-1x6x49x49xf32) <- (-1x6x49x49xf32) + softmax_1 = paddle._C_ops.softmax(reshape_221, -1) + del reshape_221 + + # pd_op.matmul: (-1x6x49x32xf32) <- (-1x6x49x49xf32, -1x6x49x32xf32) + matmul_125 = paddle._C_ops.matmul(softmax_1, slice_1, False, False) + + # pd_op.transpose: (-1x49x6x32xf32) <- (-1x6x49x32xf32) + transpose_11 = paddle._C_ops.transpose(matmul_125, [0, 2, 1, 3]) + del matmul_125 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_10 = [slice_34, full_29, full_26] + del slice_34 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_10 = paddle._C_ops.stack(combine_10, 0) + del combine_10 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x49x6x32xf32, 3xi64) + reshape_14 = paddle._C_ops.reshape(transpose_11, stack_10) + del stack_10 + + # pd_op.matmul: (-1x49x192xf32) <- (-1x49x192xf32, 192x192xf32) + matmul_7 = paddle._C_ops.matmul(reshape_14, parameter_284, False, False) + del parameter_284 + + # pd_op.add: (-1x49x192xf32) <- (-1x49x192xf32, 192xf32) + add_10 = paddle._C_ops.add(matmul_7, parameter_283) + del parameter_283 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x49x192xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(add_10, full_int_array_10) + del full_int_array_10 + + # pd_op.reshape: (-1x8x8x7x7x192xf32) <- (-1x7x7x192xf32, 6xi64) + reshape_222 = paddle._C_ops.reshape(reshape_15, full_int_array_14) + del full_int_array_14 + + # pd_op.transpose: (-1x8x7x8x7x192xf32) <- (-1x8x8x7x7x192xf32) + transpose_12 = paddle._C_ops.transpose(reshape_222, [0, 1, 3, 2, 4, 5]) + del reshape_222 + + # pd_op.reshape: (-1x56x56x192xf32) <- (-1x8x7x8x7x192xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(transpose_12, full_int_array_15) + del full_int_array_15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_3 = [3, 3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_267 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_234 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_214 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_194 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_174 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_154 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_134 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_114 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_94 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_74 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_41 = full_int_array_3 + + # pd_op.roll: (-1x56x56x192xf32) <- (-1x56x56x192xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_16, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_11 = [slice_31, full_33, full_26] + del full_33, slice_31 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_11 = paddle._C_ops.stack(combine_11, 0) + del combine_11 + + # pd_op.reshape: (-1x3136x192xf32) <- (-1x56x56x192xf32, 3xi64) + reshape_17 = paddle._C_ops.reshape(roll_1, stack_11) + del stack_11 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], + float("0.995652"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_13 = full_1 + + # pd_op.shape64: (3xi64) <- (-1x3136x192xf32) + shape64_9 = paddle._C_ops.shape64(reshape_17) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + shape64_9, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_9 + + # pd_op.full: (xi64) <- () + full_40 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_12 = [slice_37, full_40, full_40] + del slice_37 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_12 = paddle._C_ops.stack(combine_12, 0) + del combine_12 + + # pd_op.full: (1xf32) <- () + full_41 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf32) <- () + full_42 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_0 = paddle._C_ops.uniform( + stack_12, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_12 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_171 = paddle._C_ops.add(full_1, uniform_0) + del uniform_0 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_0 = paddle._C_ops.floor(add_171) + del add_171 + + # pd_op.divide: (-1x3136x192xf32) <- (-1x3136x192xf32, xf32) + divide_0 = paddle._C_ops.divide(reshape_17, full_1) + + # pd_op.multiply: (-1x3136x192xf32) <- (-1x3136x192xf32, -1x1x1xf32) + multiply_0 = paddle._C_ops.multiply(divide_0, floor_0) + + # pd_op.add: (-1x3136x192xf32) <- (-1x3136x192xf32, -1x3136x192xf32) + add_11 = paddle._C_ops.add(add_6, multiply_0) + + # pd_op.layer_norm: (-1x3136x192xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x192xf32, 192xf32, 192xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (-1x3136x768xf32) <- (-1x3136x192xf32, 192x768xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del parameter_280 + + # pd_op.add: (-1x3136x768xf32) <- (-1x3136x768xf32, 768xf32) + add_12 = paddle._C_ops.add(matmul_8, parameter_279) + del parameter_279 + + # pd_op.gelu: (-1x3136x768xf32) <- (-1x3136x768xf32) + gelu_1 = paddle._C_ops.gelu(add_12, False) + + # pd_op.matmul: (-1x3136x192xf32) <- (-1x3136x768xf32, 768x192xf32) + matmul_9 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del parameter_278 + + # pd_op.add: (-1x3136x192xf32) <- (-1x3136x192xf32, 192xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_277) + del parameter_277 + + # pd_op.shape64: (3xi64) <- (-1x3136x192xf32) + shape64_10 = paddle._C_ops.shape64(add_13) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + shape64_10, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_10 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_13 = [slice_38, full_40, full_40] + del slice_38 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_13 = paddle._C_ops.stack(combine_13, 0) + del combine_13 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_1 = paddle._C_ops.uniform( + stack_13, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_13 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_172 = paddle._C_ops.add(full_1, uniform_1) + del uniform_1 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_1 = paddle._C_ops.floor(add_172) + del add_172 + + # pd_op.divide: (-1x3136x192xf32) <- (-1x3136x192xf32, xf32) + divide_1 = paddle._C_ops.divide(add_13, full_1) + + # pd_op.multiply: (-1x3136x192xf32) <- (-1x3136x192xf32, -1x1x1xf32) + multiply_1 = paddle._C_ops.multiply(divide_1, floor_1) + + # pd_op.add: (-1x3136x192xf32) <- (-1x3136x192xf32, -1x3136x192xf32) + add_14 = paddle._C_ops.add(add_11, multiply_1) + + # pd_op.shape64: (3xi64) <- (-1x3136x192xf32) + shape64_11 = paddle._C_ops.shape64(add_14) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + shape64_11, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_11 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_14 = [slice_39, full_25, full_25, full_26] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_14 = paddle._C_ops.stack(combine_14, 0) + del combine_14 + + # pd_op.reshape: (-1x56x56x192xf32) <- (-1x3136x192xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_14, stack_14) + del stack_14 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_4 = [2, 2] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_247 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_244 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_241 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_238 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_54 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_51 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_48 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_45 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_22 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_19 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_17 = full_int_array_4 + + # pd_op.strided_slice: (-1x28x28x192xf32) <- (-1x56x56x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_16, full_int_array_29, full_int_array_4 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [1, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_239 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_46 = full_int_array_5 + + # pd_op.strided_slice: (-1x28x28x192xf32) <- (-1x56x56x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_5, full_int_array_29, full_int_array_4 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_6 = [0, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_242 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_49 = full_int_array_6 + + # pd_op.strided_slice: (-1x28x28x192xf32) <- (-1x56x56x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_6, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x28x28x192xf32) <- (-1x56x56x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_18, full_int_array_29, full_int_array_4 + ) + + # pd_op.shape64: (4xi64) <- (-1x56x56x192xf32) + shape64_12 = paddle._C_ops.shape64(reshape_18) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + shape64_12, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_15 = [slice_40, full_25, full_25, full_26] + del full_25, full_26, slice_40 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_15 = paddle._C_ops.stack(combine_15, 0) + del combine_15 + + # pd_op.reshape: (-1x56x56x192xf32) <- (-1x56x56x192xf32, 4xi64) + reshape_223 = paddle._C_ops.reshape(reshape_18, stack_15) + del stack_15 + + # pd_op.full: (1xi32) <- () + full_2 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xi32) <- (1xi32) + assign_248 = full_2 + + # pd_op.assign: (1xi32) <- (1xi32) + assign_55 = full_2 + + # builtin.combine: ([-1x28x28x192xf32, -1x28x28x192xf32, -1x28x28x192xf32, -1x28x28x192xf32]) <- (-1x28x28x192xf32, -1x28x28x192xf32, -1x28x28x192xf32, -1x28x28x192xf32) + combine_16 = [ + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + ] + + # pd_op.concat: (-1x28x28x768xf32) <- ([-1x28x28x192xf32, -1x28x28x192xf32, -1x28x28x192xf32, -1x28x28x192xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_16, full_2) + del combine_16 + + # pd_op.full: (xi64) <- () + full_43 = paddle._C_ops.full( + [], float("-1"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_44 = paddle._C_ops.full( + [], float("768"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_17 = [slice_39, full_43, full_44] + del slice_39 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_16 = paddle._C_ops.stack(combine_17, 0) + del combine_17 + + # pd_op.reshape: (-1x-1x768xf32) <- (-1x28x28x768xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(concat_0, stack_16) + del stack_16 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_19, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276 + + # pd_op.matmul: (-1x-1x384xf32) <- (-1x-1x768xf32, 768x384xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del parameter_274 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_13 = paddle._C_ops.shape64(matmul_10) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + shape64_13, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_13 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_14 = paddle._C_ops.shape64(matmul_10) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + shape64_14, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + del shape64_14 + + # pd_op.layer_norm: (-1x-1x384xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x384xf32, 384xf32, 384xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_10, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full: (xi64) <- () + full_45 = paddle._C_ops.full( + [], float("28"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_46 = paddle._C_ops.full( + [], float("384"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_18 = [slice_41, full_45, full_45, full_46] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_17 = paddle._C_ops.stack(combine_18, 0) + del combine_18 + + # pd_op.reshape: (-1x28x28x384xf32) <- (-1x-1x384xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(layer_norm_18, stack_17) + del stack_17 + + # pd_op.shape64: (4xi64) <- (-1x28x28x384xf32) + shape64_15 = paddle._C_ops.shape64(reshape_20) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + shape64_15, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_15 + + # pd_op.full: (xi64) <- () + full_47 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_19 = [slice_43, full_47, full_28, full_47, full_28, full_46] + del slice_43 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_18 = paddle._C_ops.stack(combine_19, 0) + del combine_19 + + # pd_op.reshape: (-1x4x7x4x7x384xf32) <- (-1x28x28x384xf32, 6xi64) + reshape_224 = paddle._C_ops.reshape(reshape_20, stack_18) + del stack_18 + + # pd_op.transpose: (-1x4x4x7x7x384xf32) <- (-1x4x7x4x7x384xf32) + transpose_13 = paddle._C_ops.transpose(reshape_224, [0, 1, 3, 2, 4, 5]) + del reshape_224 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_33 = [-1, 7, 7, 384] + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x4x4x7x7x384xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(transpose_13, full_int_array_33) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_34 = [-1, 49, 384] + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(reshape_21, full_int_array_34) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_16 = paddle._C_ops.shape64(reshape_22) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + shape64_16, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_16 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_11 = paddle._C_ops.matmul(reshape_22, parameter_271, False, False) + del parameter_271 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_270) + del parameter_270 + + # pd_op.full: (xi64) <- () + full_48 = paddle._C_ops.full( + [], float("12"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_20 = [slice_44, full_29, full_30, full_48, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_19 = paddle._C_ops.stack(combine_20, 0) + del combine_20 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_225 = paddle._C_ops.reshape(add_15, stack_19) + del stack_19 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_14 = paddle._C_ops.transpose(reshape_225, [2, 0, 3, 1, 4]) + del reshape_225 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_45, full_0, float("0"), True) + del slice_45 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_15 = paddle._C_ops.transpose(slice_46, [0, 1, 3, 2]) + del slice_46 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_12 = paddle._C_ops.matmul(scale_2, transpose_15, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_23 = paddle._C_ops.reshape(data_5, full_int_array_12) + del data_5 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_2 = paddle._C_ops.index_select(data_6, reshape_23, 0) + del data_6 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_226 = paddle._C_ops.reshape(index_select_2, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_16 = paddle._C_ops.transpose(reshape_226, [2, 0, 1]) + del reshape_226 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_16, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_173 = paddle._C_ops.add(matmul_12, unsqueeze_3) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_2 = paddle._C_ops.softmax(add_173, -1) + del add_173 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_126 = paddle._C_ops.matmul(softmax_2, slice_2, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_17 = paddle._C_ops.transpose(matmul_126, [0, 2, 1, 3]) + del matmul_126 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_21 = [slice_44, full_29, full_46] + del slice_44 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_20 = paddle._C_ops.stack(combine_21, 0) + del combine_21 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_24 = paddle._C_ops.reshape(transpose_17, stack_20) + del stack_20 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_13 = paddle._C_ops.matmul(reshape_24, parameter_269, False, False) + del parameter_269 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_16 = paddle._C_ops.add(matmul_13, parameter_268) + del parameter_268 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_16, full_int_array_33) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [-1, 4, 4, 7, 7, 384] + + # pd_op.reshape: (-1x4x4x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_227 = paddle._C_ops.reshape(reshape_25, full_int_array_35) + + # pd_op.transpose: (-1x4x7x4x7x384xf32) <- (-1x4x4x7x7x384xf32) + transpose_18 = paddle._C_ops.transpose(reshape_227, [0, 1, 3, 2, 4, 5]) + del reshape_227 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 28, 28, 384] + + # pd_op.reshape: (-1x28x28x384xf32) <- (-1x4x7x4x7x384xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(transpose_18, full_int_array_36) + + # pd_op.full: (xi64) <- () + full_49 = paddle._C_ops.full( + [], float("784"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_22 = [slice_41, full_49, full_46] + del slice_41 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_21 = paddle._C_ops.stack(combine_22, 0) + del combine_22 + + # pd_op.reshape: (-1x784x384xf32) <- (-1x28x28x384xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, stack_21) + del stack_21 + + # pd_op.full: (xf32) <- () + full_3 = paddle._C_ops.full( + [], + float("0.991304"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_31 = full_3 + + # pd_op.shape64: (3xi64) <- (-1x784x384xf32) + shape64_17 = paddle._C_ops.shape64(reshape_27) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + shape64_17, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_17 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_23 = [slice_47, full_40, full_40] + del slice_47 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_22 = paddle._C_ops.stack(combine_23, 0) + del combine_23 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_2 = paddle._C_ops.uniform( + stack_22, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_22 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_174 = paddle._C_ops.add(full_3, uniform_2) + del uniform_2 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_2 = paddle._C_ops.floor(add_174) + del add_174 + + # pd_op.divide: (-1x784x384xf32) <- (-1x784x384xf32, xf32) + divide_2 = paddle._C_ops.divide(reshape_27, full_3) + + # pd_op.multiply: (-1x784x384xf32) <- (-1x784x384xf32, -1x1x1xf32) + multiply_2 = paddle._C_ops.multiply(divide_2, floor_2) + + # pd_op.add: (-1x784x384xf32) <- (-1x-1x384xf32, -1x784x384xf32) + add_17 = paddle._C_ops.add(matmul_10, multiply_2) + + # pd_op.layer_norm: (-1x784x384xf32, -1x784xf32, -1x784xf32) <- (-1x784x384xf32, 384xf32, 384xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (-1x784x1536xf32) <- (-1x784x384xf32, 384x1536xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del parameter_265 + + # pd_op.add: (-1x784x1536xf32) <- (-1x784x1536xf32, 1536xf32) + add_18 = paddle._C_ops.add(matmul_14, parameter_264) + del parameter_264 + + # pd_op.gelu: (-1x784x1536xf32) <- (-1x784x1536xf32) + gelu_2 = paddle._C_ops.gelu(add_18, False) + + # pd_op.matmul: (-1x784x384xf32) <- (-1x784x1536xf32, 1536x384xf32) + matmul_15 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del parameter_263 + + # pd_op.add: (-1x784x384xf32) <- (-1x784x384xf32, 384xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_262) + del parameter_262 + + # pd_op.shape64: (3xi64) <- (-1x784x384xf32) + shape64_18 = paddle._C_ops.shape64(add_19) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + shape64_18, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_18 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_24 = [slice_48, full_40, full_40] + del slice_48 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_23 = paddle._C_ops.stack(combine_24, 0) + del combine_24 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_3 = paddle._C_ops.uniform( + stack_23, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_23 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_175 = paddle._C_ops.add(full_3, uniform_3) + del uniform_3 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_3 = paddle._C_ops.floor(add_175) + del add_175 + + # pd_op.divide: (-1x784x384xf32) <- (-1x784x384xf32, xf32) + divide_3 = paddle._C_ops.divide(add_19, full_3) + + # pd_op.multiply: (-1x784x384xf32) <- (-1x784x384xf32, -1x1x1xf32) + multiply_3 = paddle._C_ops.multiply(divide_3, floor_3) + + # pd_op.add: (-1x784x384xf32) <- (-1x784x384xf32, -1x784x384xf32) + add_20 = paddle._C_ops.add(add_17, multiply_3) + + # pd_op.shape64: (3xi64) <- (-1x784x384xf32) + shape64_19 = paddle._C_ops.shape64(add_20) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + shape64_19, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_19 + + # pd_op.layer_norm: (-1x784x384xf32, -1x784xf32, -1x784xf32) <- (-1x784x384xf32, 384xf32, 384xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_25 = [slice_49, full_45, full_45, full_46] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_24 = paddle._C_ops.stack(combine_25, 0) + del combine_25 + + # pd_op.reshape: (-1x28x28x384xf32) <- (-1x784x384xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(layer_norm_24, stack_24) + del stack_24 + + # pd_op.shape64: (4xi64) <- (-1x28x28x384xf32) + shape64_20 = paddle._C_ops.shape64(reshape_28) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + shape64_20, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_20 + + # pd_op.roll: (-1x28x28x384xf32) <- (-1x28x28x384xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_28, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x28x28x384xf32) + shape64_21 = paddle._C_ops.shape64(roll_2) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + shape64_21, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_21 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_26 = [slice_51, full_47, full_28, full_47, full_28, full_46] + del slice_51 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_25 = paddle._C_ops.stack(combine_26, 0) + del combine_26 + + # pd_op.reshape: (-1x4x7x4x7x384xf32) <- (-1x28x28x384xf32, 6xi64) + reshape_228 = paddle._C_ops.reshape(roll_2, stack_25) + del stack_25 + + # pd_op.transpose: (-1x4x4x7x7x384xf32) <- (-1x4x7x4x7x384xf32) + transpose_19 = paddle._C_ops.transpose(reshape_228, [0, 1, 3, 2, 4, 5]) + del reshape_228 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x4x4x7x7x384xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(transpose_19, full_int_array_33) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_30 = paddle._C_ops.reshape(reshape_29, full_int_array_34) + del full_int_array_34 + + # pd_op.full: (1x28x28x1xf32) <- () + full_50 = paddle._C_ops.full( + [1, 28, 28, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_50, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_50 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_37 = [1, 4, 7, 4, 7, 1] + + # pd_op.reshape: (1x4x7x4x7x1xf32) <- (1x28x28x1xf32, 6xi64) + reshape_229 = paddle._C_ops.reshape(set_value__1, full_int_array_37) + del full_int_array_37 + + # pd_op.transpose: (1x4x4x7x7x1xf32) <- (1x4x7x4x7x1xf32) + transpose_147 = paddle._C_ops.transpose(reshape_229, [0, 1, 3, 2, 4, 5]) + del reshape_229 + + # pd_op.reshape: (16x7x7x1xf32) <- (1x4x4x7x7x1xf32, 4xi64) + reshape_230 = paddle._C_ops.reshape(transpose_147, full_int_array_31) + del transpose_147 + + # pd_op.reshape: (16x49xf32) <- (16x7x7x1xf32, 2xi64) + reshape_231 = paddle._C_ops.reshape(reshape_230, full_int_array_32) + del reshape_230 + + # pd_op.unsqueeze: (16x1x49xf32) <- (16x49xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_8) + + # pd_op.unsqueeze: (16x49x1xf32) <- (16x49xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_0) + del reshape_231 + + # pd_op.subtract: (16x49x49xf32) <- (16x1x49xf32, 16x49x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_40, unsqueeze_41) + del unsqueeze_40, unsqueeze_41 + + # pd_op.not_equal: (16x49x49xb) <- (16x49x49xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_35) + + # pd_op.full: (16x49x49xf32) <- () + full_51 = paddle._C_ops.full( + [16, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_51, subtract_1) + del full_51, not_equal_1, subtract_1 + + # pd_op.equal: (16x49x49xb) <- (16x49x49xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_35) + + # pd_op.full: (16x49x49xf32) <- () + full_52 = paddle._C_ops.full( + [16, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_3 = paddle._C_ops.where(equal_1, full_52, where_2) + del equal_1, full_52, where_2 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_22 = paddle._C_ops.shape64(reshape_30) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + shape64_22, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_22 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_16 = paddle._C_ops.matmul(reshape_30, parameter_259, False, False) + del parameter_259 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_21 = paddle._C_ops.add(matmul_16, parameter_258) + del parameter_258 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_27 = [slice_52, full_29, full_30, full_48, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_26 = paddle._C_ops.stack(combine_27, 0) + del combine_27 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_21, stack_26) + del stack_26 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_20 = paddle._C_ops.transpose(reshape_232, [2, 0, 3, 1, 4]) + del reshape_232 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_53, full_0, float("0"), True) + del slice_53 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_21 = paddle._C_ops.transpose(slice_54, [0, 1, 3, 2]) + del slice_54 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_17 = paddle._C_ops.matmul(scale_3, transpose_21, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_31 = paddle._C_ops.reshape(data_7, full_int_array_12) + del data_7 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_3 = paddle._C_ops.index_select(data_8, reshape_31, 0) + del data_8 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_233 = paddle._C_ops.reshape(index_select_3, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_22 = paddle._C_ops.transpose(reshape_233, [2, 0, 1]) + del reshape_233 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_22, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_22 = paddle._C_ops.add(matmul_17, unsqueeze_4) + + # pd_op.full: (xi64) <- () + full_53 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_1 = paddle._C_ops.floor_divide(slice_52, full_53) + del full_53 + + # pd_op.full: (xi64) <- () + full_54 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_28 = [floor_divide_1, full_54, full_48, full_29, full_29] + del floor_divide_1, full_54 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_27 = paddle._C_ops.stack(combine_28, 0) + del combine_28 + + # pd_op.reshape: (-1x16x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_32 = paddle._C_ops.reshape(add_22, stack_27) + del stack_27 + + # pd_op.unsqueeze: (16x1x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(where_3, full_int_array_8) + del where_3 + + # pd_op.unsqueeze: (1x16x1x49x49xf32) <- (16x1x49x49xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_42, full_int_array_7) + del unsqueeze_42 + + # pd_op.add: (-1x16x12x49x49xf32) <- (-1x16x12x49x49xf32, 1x16x1x49x49xf32) + add_23 = paddle._C_ops.add(reshape_32, unsqueeze_5) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_29 = [slice_52, full_48, full_29, full_29] + del full_48 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_28 = paddle._C_ops.stack(combine_29, 0) + del combine_29 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x16x12x49x49xf32, 4xi64) + reshape_234 = paddle._C_ops.reshape(add_23, stack_28) + del stack_28 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_3 = paddle._C_ops.softmax(reshape_234, -1) + del reshape_234 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_127 = paddle._C_ops.matmul(softmax_3, slice_3, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_23 = paddle._C_ops.transpose(matmul_127, [0, 2, 1, 3]) + del matmul_127 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_30 = [slice_52, full_29, full_46] + del slice_52 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_29 = paddle._C_ops.stack(combine_30, 0) + del combine_30 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_33 = paddle._C_ops.reshape(transpose_23, stack_29) + del stack_29 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_18 = paddle._C_ops.matmul(reshape_33, parameter_257, False, False) + del parameter_257 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_24 = paddle._C_ops.add(matmul_18, parameter_256) + del parameter_256 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_24, full_int_array_33) + del full_int_array_33 + + # pd_op.reshape: (-1x4x4x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_235 = paddle._C_ops.reshape(reshape_34, full_int_array_35) + del full_int_array_35 + + # pd_op.transpose: (-1x4x7x4x7x384xf32) <- (-1x4x4x7x7x384xf32) + transpose_24 = paddle._C_ops.transpose(reshape_235, [0, 1, 3, 2, 4, 5]) + del reshape_235 + + # pd_op.reshape: (-1x28x28x384xf32) <- (-1x4x7x4x7x384xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_24, full_int_array_36) + del full_int_array_36 + + # pd_op.roll: (-1x28x28x384xf32) <- (-1x28x28x384xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_35, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_31 = [slice_49, full_49, full_46] + del full_49, slice_49 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_30 = paddle._C_ops.stack(combine_31, 0) + del combine_31 + + # pd_op.reshape: (-1x784x384xf32) <- (-1x28x28x384xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(roll_3, stack_30) + del stack_30 + + # pd_op.full: (xf32) <- () + full_4 = paddle._C_ops.full( + [], + float("0.986957"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_42 = full_4 + + # pd_op.shape64: (3xi64) <- (-1x784x384xf32) + shape64_23 = paddle._C_ops.shape64(reshape_36) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + shape64_23, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_23 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_32 = [slice_55, full_40, full_40] + del slice_55 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_31 = paddle._C_ops.stack(combine_32, 0) + del combine_32 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_4 = paddle._C_ops.uniform( + stack_31, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_31 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_176 = paddle._C_ops.add(full_4, uniform_4) + del uniform_4 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_4 = paddle._C_ops.floor(add_176) + del add_176 + + # pd_op.divide: (-1x784x384xf32) <- (-1x784x384xf32, xf32) + divide_4 = paddle._C_ops.divide(reshape_36, full_4) + + # pd_op.multiply: (-1x784x384xf32) <- (-1x784x384xf32, -1x1x1xf32) + multiply_4 = paddle._C_ops.multiply(divide_4, floor_4) + + # pd_op.add: (-1x784x384xf32) <- (-1x784x384xf32, -1x784x384xf32) + add_25 = paddle._C_ops.add(add_20, multiply_4) + + # pd_op.layer_norm: (-1x784x384xf32, -1x784xf32, -1x784xf32) <- (-1x784x384xf32, 384xf32, 384xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (-1x784x1536xf32) <- (-1x784x384xf32, 384x1536xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del parameter_253 + + # pd_op.add: (-1x784x1536xf32) <- (-1x784x1536xf32, 1536xf32) + add_26 = paddle._C_ops.add(matmul_19, parameter_252) + del parameter_252 + + # pd_op.gelu: (-1x784x1536xf32) <- (-1x784x1536xf32) + gelu_3 = paddle._C_ops.gelu(add_26, False) + + # pd_op.matmul: (-1x784x384xf32) <- (-1x784x1536xf32, 1536x384xf32) + matmul_20 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del parameter_251 + + # pd_op.add: (-1x784x384xf32) <- (-1x784x384xf32, 384xf32) + add_27 = paddle._C_ops.add(matmul_20, parameter_250) + del parameter_250 + + # pd_op.shape64: (3xi64) <- (-1x784x384xf32) + shape64_24 = paddle._C_ops.shape64(add_27) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + shape64_24, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_24 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_33 = [slice_56, full_40, full_40] + del slice_56 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_32 = paddle._C_ops.stack(combine_33, 0) + del combine_33 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_5 = paddle._C_ops.uniform( + stack_32, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_32 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_177 = paddle._C_ops.add(full_4, uniform_5) + del uniform_5 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_5 = paddle._C_ops.floor(add_177) + del add_177 + + # pd_op.divide: (-1x784x384xf32) <- (-1x784x384xf32, xf32) + divide_5 = paddle._C_ops.divide(add_27, full_4) + + # pd_op.multiply: (-1x784x384xf32) <- (-1x784x384xf32, -1x1x1xf32) + multiply_5 = paddle._C_ops.multiply(divide_5, floor_5) + + # pd_op.add: (-1x784x384xf32) <- (-1x784x384xf32, -1x784x384xf32) + add_28 = paddle._C_ops.add(add_25, multiply_5) + + # pd_op.shape64: (3xi64) <- (-1x784x384xf32) + shape64_25 = paddle._C_ops.shape64(add_28) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + shape64_25, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_25 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_34 = [slice_57, full_45, full_45, full_46] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_33 = paddle._C_ops.stack(combine_34, 0) + del combine_34 + + # pd_op.reshape: (-1x28x28x384xf32) <- (-1x784x384xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_28, stack_33) + del stack_33 + + # pd_op.strided_slice: (-1x14x14x384xf32) <- (-1x28x28x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_16, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x14x14x384xf32) <- (-1x28x28x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_5, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x14x14x384xf32) <- (-1x28x28x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_6, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x14x14x384xf32) <- (-1x28x28x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_18, full_int_array_29, full_int_array_4 + ) + + # pd_op.shape64: (4xi64) <- (-1x28x28x384xf32) + shape64_26 = paddle._C_ops.shape64(reshape_37) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + shape64_26, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_26 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_35 = [slice_58, full_45, full_45, full_46] + del full_45, full_46, slice_58 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_34 = paddle._C_ops.stack(combine_35, 0) + del combine_35 + + # pd_op.reshape: (-1x28x28x384xf32) <- (-1x28x28x384xf32, 4xi64) + reshape_236 = paddle._C_ops.reshape(reshape_37, stack_34) + del stack_34 + + # builtin.combine: ([-1x14x14x384xf32, -1x14x14x384xf32, -1x14x14x384xf32, -1x14x14x384xf32]) <- (-1x14x14x384xf32, -1x14x14x384xf32, -1x14x14x384xf32, -1x14x14x384xf32) + combine_36 = [ + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + ] + + # pd_op.concat: (-1x14x14x1536xf32) <- ([-1x14x14x384xf32, -1x14x14x384xf32, -1x14x14x384xf32, -1x14x14x384xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_36, full_2) + del combine_36 + + # pd_op.full: (xi64) <- () + full_55 = paddle._C_ops.full( + [], float("1536"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_37 = [slice_57, full_43, full_55] + del slice_57 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_35 = paddle._C_ops.stack(combine_37, 0) + del combine_37 + + # pd_op.reshape: (-1x-1x1536xf32) <- (-1x14x14x1536xf32, 3xi64) + reshape_38 = paddle._C_ops.reshape(concat_1, stack_35) + del stack_35 + + # pd_op.layer_norm: (-1x-1x1536xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1536xf32, 1536xf32, 1536xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_38, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249 + + # pd_op.matmul: (-1x-1x768xf32) <- (-1x-1x1536xf32, 1536x768xf32) + matmul_21 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del parameter_247 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_27 = paddle._C_ops.shape64(matmul_21) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + shape64_27, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_27 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_28 = paddle._C_ops.shape64(matmul_21) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + shape64_28, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + del shape64_28 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_21, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full: (xi64) <- () + full_56 = paddle._C_ops.full( + [], float("14"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_38 = [slice_59, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_36 = paddle._C_ops.stack(combine_38, 0) + del combine_38 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x-1x768xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(layer_norm_33, stack_36) + del stack_36 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_29 = paddle._C_ops.shape64(reshape_39) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + shape64_29, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_29 + + # pd_op.full: (xi64) <- () + full_57 = paddle._C_ops.full( + [], float("2"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_39 = [slice_61, full_57, full_28, full_57, full_28, full_44] + del slice_61 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_37 = paddle._C_ops.stack(combine_39, 0) + del combine_39 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_237 = paddle._C_ops.reshape(reshape_39, stack_37) + del stack_37 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_25 = paddle._C_ops.transpose(reshape_237, [0, 1, 3, 2, 4, 5]) + del reshape_237 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_38 = [-1, 7, 7, 768] + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(transpose_25, full_int_array_38) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_39 = [-1, 49, 768] + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(reshape_40, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_30 = paddle._C_ops.shape64(reshape_41) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + shape64_30, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_30 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_22 = paddle._C_ops.matmul(reshape_41, parameter_244, False, False) + del parameter_244 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_29 = paddle._C_ops.add(matmul_22, parameter_243) + del parameter_243 + + # pd_op.full: (xi64) <- () + full_58 = paddle._C_ops.full( + [], float("24"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_40 = [slice_62, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_38 = paddle._C_ops.stack(combine_40, 0) + del combine_40 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_238 = paddle._C_ops.reshape(add_29, stack_38) + del stack_38 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_26 = paddle._C_ops.transpose(reshape_238, [2, 0, 3, 1, 4]) + del reshape_238 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_63, full_0, float("0"), True) + del slice_63 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_27 = paddle._C_ops.transpose(slice_64, [0, 1, 3, 2]) + del slice_64 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_23 = paddle._C_ops.matmul(scale_4, transpose_27, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_42 = paddle._C_ops.reshape(data_9, full_int_array_12) + del data_9 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_4 = paddle._C_ops.index_select(data_10, reshape_42, 0) + del data_10 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_239 = paddle._C_ops.reshape(index_select_4, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_28 = paddle._C_ops.transpose(reshape_239, [2, 0, 1]) + del reshape_239 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_28, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_178 = paddle._C_ops.add(matmul_23, unsqueeze_6) + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_4 = paddle._C_ops.softmax(add_178, -1) + del add_178 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_128 = paddle._C_ops.matmul(softmax_4, slice_4, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_29 = paddle._C_ops.transpose(matmul_128, [0, 2, 1, 3]) + del matmul_128 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_41 = [slice_62, full_29, full_44] + del slice_62 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_39 = paddle._C_ops.stack(combine_41, 0) + del combine_41 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_29, stack_39) + del stack_39 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_24 = paddle._C_ops.matmul(reshape_43, parameter_242, False, False) + del parameter_242 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_241) + del parameter_241 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_30, full_int_array_38) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_40 = [-1, 2, 2, 7, 7, 768] + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_44, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_30 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_41 = [-1, 14, 14, 768] + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(transpose_30, full_int_array_41) + + # pd_op.full: (xi64) <- () + full_59 = paddle._C_ops.full( + [], float("196"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_42 = [slice_59, full_59, full_44] + del slice_59 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_40 = paddle._C_ops.stack(combine_42, 0) + del combine_42 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_46 = paddle._C_ops.reshape(reshape_45, stack_40) + del stack_40 + + # pd_op.full: (xf32) <- () + full_5 = paddle._C_ops.full( + [], + float("0.982609"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_64 = full_5 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_31 = paddle._C_ops.shape64(reshape_46) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + shape64_31, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_31 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_43 = [slice_65, full_40, full_40] + del slice_65 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_41 = paddle._C_ops.stack(combine_43, 0) + del combine_43 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_6 = paddle._C_ops.uniform( + stack_41, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_41 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_179 = paddle._C_ops.add(full_5, uniform_6) + del uniform_6 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_6 = paddle._C_ops.floor(add_179) + del add_179 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_6 = paddle._C_ops.divide(reshape_46, full_5) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_6 = paddle._C_ops.multiply(divide_6, floor_6) + + # pd_op.add: (-1x196x768xf32) <- (-1x-1x768xf32, -1x196x768xf32) + add_31 = paddle._C_ops.add(matmul_21, multiply_6) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_31, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del parameter_238 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_32 = paddle._C_ops.add(matmul_25, parameter_237) + del parameter_237 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_32, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_26 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del parameter_236 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_33 = paddle._C_ops.add(matmul_26, parameter_235) + del parameter_235 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_32 = paddle._C_ops.shape64(add_33) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + shape64_32, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_32 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_44 = [slice_66, full_40, full_40] + del slice_66 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_42 = paddle._C_ops.stack(combine_44, 0) + del combine_44 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_7 = paddle._C_ops.uniform( + stack_42, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_42 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_180 = paddle._C_ops.add(full_5, uniform_7) + del uniform_7 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_7 = paddle._C_ops.floor(add_180) + del add_180 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_7 = paddle._C_ops.divide(add_33, full_5) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_7 = paddle._C_ops.multiply(divide_7, floor_7) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_34 = paddle._C_ops.add(add_31, multiply_7) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_33 = paddle._C_ops.shape64(add_34) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + shape64_33, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_33 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_45 = [slice_67, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_43 = paddle._C_ops.stack(combine_45, 0) + del combine_45 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(layer_norm_39, stack_43) + del stack_43 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_34 = paddle._C_ops.shape64(reshape_47) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + shape64_34, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_34 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_47, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_35 = paddle._C_ops.shape64(roll_4) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + shape64_35, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_35 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_46 = [slice_69, full_57, full_28, full_57, full_28, full_44] + del slice_69 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_44 = paddle._C_ops.stack(combine_46, 0) + del combine_46 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_241 = paddle._C_ops.reshape(roll_4, stack_44) + del stack_44 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_31 = paddle._C_ops.transpose(reshape_241, [0, 1, 3, 2, 4, 5]) + del reshape_241 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(transpose_31, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_49 = paddle._C_ops.reshape(reshape_48, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_60 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_60, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_60 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_42 = [1, 2, 7, 2, 7, 1] + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_242 = paddle._C_ops.reshape(set_value__2, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_148 = paddle._C_ops.transpose(reshape_242, [0, 1, 3, 2, 4, 5]) + del reshape_242 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_243 = paddle._C_ops.reshape(transpose_148, full_int_array_31) + del transpose_148 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_244 = paddle._C_ops.reshape(reshape_243, full_int_array_32) + del reshape_243 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_0) + del reshape_244 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_35) + + # pd_op.full: (4x49x49xf32) <- () + full_61 = paddle._C_ops.full( + [4, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_61, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_35) + + # pd_op.full: (4x49x49xf32) <- () + full_62 = paddle._C_ops.full( + [4, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_5 = paddle._C_ops.where(equal_2, full_62, where_4) + del equal_2, where_4 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_36 = paddle._C_ops.shape64(reshape_49) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + shape64_36, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_36 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_27 = paddle._C_ops.matmul(reshape_49, parameter_232, False, False) + del parameter_232 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_35 = paddle._C_ops.add(matmul_27, parameter_231) + del parameter_231 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_47 = [slice_70, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_45 = paddle._C_ops.stack(combine_47, 0) + del combine_47 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_245 = paddle._C_ops.reshape(add_35, stack_45) + del stack_45 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_32 = paddle._C_ops.transpose(reshape_245, [2, 0, 3, 1, 4]) + del reshape_245 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_72 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_71, full_0, float("0"), True) + del slice_71 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_33 = paddle._C_ops.transpose(slice_72, [0, 1, 3, 2]) + del slice_72 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_28 = paddle._C_ops.matmul(scale_5, transpose_33, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_50 = paddle._C_ops.reshape(data_11, full_int_array_12) + del data_11 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_5 = paddle._C_ops.index_select(data_12, reshape_50, 0) + del data_12 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(index_select_5, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_34 = paddle._C_ops.transpose(reshape_246, [2, 0, 1]) + del reshape_246 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(transpose_34, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_36 = paddle._C_ops.add(matmul_28, unsqueeze_7) + + # pd_op.full: (xi64) <- () + full_63 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_2 = paddle._C_ops.floor_divide(slice_70, full_63) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_48 = [floor_divide_2, full_47, full_58, full_29, full_29] + del floor_divide_2 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_46 = paddle._C_ops.stack(combine_48, 0) + del combine_48 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_51 = paddle._C_ops.reshape(add_36, stack_46) + del stack_46 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(where_5, full_int_array_8) + del where_5 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(unsqueeze_45, full_int_array_7) + del unsqueeze_45 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_37 = paddle._C_ops.add(reshape_51, unsqueeze_8) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_49 = [slice_70, full_58, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_47 = paddle._C_ops.stack(combine_49, 0) + del combine_49 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_37, stack_47) + del stack_47 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_5 = paddle._C_ops.softmax(reshape_247, -1) + del reshape_247 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_129 = paddle._C_ops.matmul(softmax_5, slice_5, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_35 = paddle._C_ops.transpose(matmul_129, [0, 2, 1, 3]) + del matmul_129 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_50 = [slice_70, full_29, full_44] + del slice_70 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_48 = paddle._C_ops.stack(combine_50, 0) + del combine_50 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_52 = paddle._C_ops.reshape(transpose_35, stack_48) + del stack_48 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_29 = paddle._C_ops.matmul(reshape_52, parameter_230, False, False) + del parameter_230 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_38 = paddle._C_ops.add(matmul_29, parameter_229) + del parameter_229 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(add_38, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_53, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_36 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(transpose_36, full_int_array_41) + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_54, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_51 = [slice_67, full_59, full_44] + del slice_67 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_49 = paddle._C_ops.stack(combine_51, 0) + del combine_51 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(roll_5, stack_49) + del stack_49 + + # pd_op.full: (xf32) <- () + full_6 = paddle._C_ops.full( + [], + float("0.978261"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_75 = full_6 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_37 = paddle._C_ops.shape64(reshape_55) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_73 = paddle._C_ops.slice( + shape64_37, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_37 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_52 = [slice_73, full_40, full_40] + del slice_73 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_50 = paddle._C_ops.stack(combine_52, 0) + del combine_52 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_8 = paddle._C_ops.uniform( + stack_50, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_50 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_181 = paddle._C_ops.add(full_6, uniform_8) + del uniform_8 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_8 = paddle._C_ops.floor(add_181) + del add_181 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_8 = paddle._C_ops.divide(reshape_55, full_6) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_8 = paddle._C_ops.multiply(divide_8, floor_8) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_39 = paddle._C_ops.add(add_34, multiply_8) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_39, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del parameter_226 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_40 = paddle._C_ops.add(matmul_30, parameter_225) + del parameter_225 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_40, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_31 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del parameter_224 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_41 = paddle._C_ops.add(matmul_31, parameter_223) + del parameter_223 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_38 = paddle._C_ops.shape64(add_41) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_74 = paddle._C_ops.slice( + shape64_38, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_38 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_53 = [slice_74, full_40, full_40] + del slice_74 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_51 = paddle._C_ops.stack(combine_53, 0) + del combine_53 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_9 = paddle._C_ops.uniform( + stack_51, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_51 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_182 = paddle._C_ops.add(full_6, uniform_9) + del uniform_9 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_9 = paddle._C_ops.floor(add_182) + del add_182 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_9 = paddle._C_ops.divide(add_41, full_6) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_9 = paddle._C_ops.multiply(divide_9, floor_9) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_42 = paddle._C_ops.add(add_39, multiply_9) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_39 = paddle._C_ops.shape64(add_42) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_75 = paddle._C_ops.slice( + shape64_39, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_39 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_54 = [slice_75, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_52 = paddle._C_ops.stack(combine_54, 0) + del combine_54 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(layer_norm_45, stack_52) + del stack_52 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_40 = paddle._C_ops.shape64(reshape_56) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_76 = paddle._C_ops.slice( + shape64_40, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_40 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_55 = [slice_76, full_57, full_28, full_57, full_28, full_44] + del slice_76 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_53 = paddle._C_ops.stack(combine_55, 0) + del combine_55 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_249 = paddle._C_ops.reshape(reshape_56, stack_53) + del stack_53 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_37 = paddle._C_ops.transpose(reshape_249, [0, 1, 3, 2, 4, 5]) + del reshape_249 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(transpose_37, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_58 = paddle._C_ops.reshape(reshape_57, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_41 = paddle._C_ops.shape64(reshape_58) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_77 = paddle._C_ops.slice( + shape64_41, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_41 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_32 = paddle._C_ops.matmul(reshape_58, parameter_220, False, False) + del parameter_220 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_43 = paddle._C_ops.add(matmul_32, parameter_219) + del parameter_219 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_56 = [slice_77, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_54 = paddle._C_ops.stack(combine_56, 0) + del combine_56 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_250 = paddle._C_ops.reshape(add_43, stack_54) + del stack_54 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_38 = paddle._C_ops.transpose(reshape_250, [2, 0, 3, 1, 4]) + del reshape_250 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_78 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_79 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_78, full_0, float("0"), True) + del slice_78 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_39 = paddle._C_ops.transpose(slice_79, [0, 1, 3, 2]) + del slice_79 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_33 = paddle._C_ops.matmul(scale_6, transpose_39, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_59 = paddle._C_ops.reshape(data_13, full_int_array_12) + del data_13 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_6 = paddle._C_ops.index_select(data_14, reshape_59, 0) + del data_14 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_251 = paddle._C_ops.reshape(index_select_6, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_40 = paddle._C_ops.transpose(reshape_251, [2, 0, 1]) + del reshape_251 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_40, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_183 = paddle._C_ops.add(matmul_33, unsqueeze_9) + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_6 = paddle._C_ops.softmax(add_183, -1) + del add_183 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_6, slice_6, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_41 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_57 = [slice_77, full_29, full_44] + del slice_77 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_55 = paddle._C_ops.stack(combine_57, 0) + del combine_57 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_60 = paddle._C_ops.reshape(transpose_41, stack_55) + del stack_55 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(reshape_60, parameter_218, False, False) + del parameter_218 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_44 = paddle._C_ops.add(matmul_34, parameter_217) + del parameter_217 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(add_44, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(reshape_61, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_42 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(transpose_42, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_58 = [slice_75, full_59, full_44] + del slice_75 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_56 = paddle._C_ops.stack(combine_58, 0) + del combine_58 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, stack_56) + del stack_56 + + # pd_op.full: (xf32) <- () + full_7 = paddle._C_ops.full( + [], + float("0.973913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_84 = full_7 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_42 = paddle._C_ops.shape64(reshape_63) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_80 = paddle._C_ops.slice( + shape64_42, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_42 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_59 = [slice_80, full_40, full_40] + del slice_80 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_57 = paddle._C_ops.stack(combine_59, 0) + del combine_59 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_10 = paddle._C_ops.uniform( + stack_57, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_57 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_184 = paddle._C_ops.add(full_7, uniform_10) + del uniform_10 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_10 = paddle._C_ops.floor(add_184) + del add_184 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_10 = paddle._C_ops.divide(reshape_63, full_7) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_10 = paddle._C_ops.multiply(divide_10, floor_10) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_45 = paddle._C_ops.add(add_42, multiply_10) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del parameter_214 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_46 = paddle._C_ops.add(matmul_35, parameter_213) + del parameter_213 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_46, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_36 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del parameter_212 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_47 = paddle._C_ops.add(matmul_36, parameter_211) + del parameter_211 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_43 = paddle._C_ops.shape64(add_47) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_81 = paddle._C_ops.slice( + shape64_43, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_43 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_60 = [slice_81, full_40, full_40] + del slice_81 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_58 = paddle._C_ops.stack(combine_60, 0) + del combine_60 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_11 = paddle._C_ops.uniform( + stack_58, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_58 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_185 = paddle._C_ops.add(full_7, uniform_11) + del uniform_11 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_11 = paddle._C_ops.floor(add_185) + del add_185 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_11 = paddle._C_ops.divide(add_47, full_7) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_11 = paddle._C_ops.multiply(divide_11, floor_11) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_48 = paddle._C_ops.add(add_45, multiply_11) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_44 = paddle._C_ops.shape64(add_48) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_82 = paddle._C_ops.slice( + shape64_44, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_44 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_61 = [slice_82, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_59 = paddle._C_ops.stack(combine_61, 0) + del combine_61 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(layer_norm_51, stack_59) + del stack_59 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_45 = paddle._C_ops.shape64(reshape_64) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_83 = paddle._C_ops.slice( + shape64_45, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_45 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_64, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_46 = paddle._C_ops.shape64(roll_6) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_84 = paddle._C_ops.slice( + shape64_46, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_46 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_62 = [slice_84, full_57, full_28, full_57, full_28, full_44] + del slice_84 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_60 = paddle._C_ops.stack(combine_62, 0) + del combine_62 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_253 = paddle._C_ops.reshape(roll_6, stack_60) + del stack_60 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_43 = paddle._C_ops.transpose(reshape_253, [0, 1, 3, 2, 4, 5]) + del reshape_253 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(transpose_43, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_64 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_64, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_64 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_254 = paddle._C_ops.reshape(set_value__3, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_149 = paddle._C_ops.transpose(reshape_254, [0, 1, 3, 2, 4, 5]) + del reshape_254 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_255 = paddle._C_ops.reshape(transpose_149, full_int_array_31) + del transpose_149 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_256 = paddle._C_ops.reshape(reshape_255, full_int_array_32) + del reshape_255 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_0) + del reshape_256 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_46, unsqueeze_47) + del unsqueeze_46, unsqueeze_47 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_61, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_7 = paddle._C_ops.where(equal_3, full_62, where_6) + del equal_3, where_6 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_47 = paddle._C_ops.shape64(reshape_66) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_85 = paddle._C_ops.slice( + shape64_47, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_47 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_37 = paddle._C_ops.matmul(reshape_66, parameter_208, False, False) + del parameter_208 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_49 = paddle._C_ops.add(matmul_37, parameter_207) + del parameter_207 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_63 = [slice_85, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_61 = paddle._C_ops.stack(combine_63, 0) + del combine_63 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_257 = paddle._C_ops.reshape(add_49, stack_61) + del stack_61 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_44 = paddle._C_ops.transpose(reshape_257, [2, 0, 3, 1, 4]) + del reshape_257 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_86 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_87 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_86, full_0, float("0"), True) + del slice_86 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_45 = paddle._C_ops.transpose(slice_87, [0, 1, 3, 2]) + del slice_87 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_38 = paddle._C_ops.matmul(scale_7, transpose_45, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_67 = paddle._C_ops.reshape(data_15, full_int_array_12) + del data_15 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_7 = paddle._C_ops.index_select(data_16, reshape_67, 0) + del data_16 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_258 = paddle._C_ops.reshape(index_select_7, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_46 = paddle._C_ops.transpose(reshape_258, [2, 0, 1]) + del reshape_258 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(transpose_46, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_50 = paddle._C_ops.add(matmul_38, unsqueeze_10) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_3 = paddle._C_ops.floor_divide(slice_85, full_63) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_64 = [floor_divide_3, full_47, full_58, full_29, full_29] + del floor_divide_3 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_62 = paddle._C_ops.stack(combine_64, 0) + del combine_64 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_68 = paddle._C_ops.reshape(add_50, stack_62) + del stack_62 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(where_7, full_int_array_8) + del where_7 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_48, full_int_array_7) + del unsqueeze_48 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_51 = paddle._C_ops.add(reshape_68, unsqueeze_11) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_65 = [slice_85, full_58, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_63 = paddle._C_ops.stack(combine_65, 0) + del combine_65 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_259 = paddle._C_ops.reshape(add_51, stack_63) + del stack_63 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_7 = paddle._C_ops.softmax(reshape_259, -1) + del reshape_259 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_131 = paddle._C_ops.matmul(softmax_7, slice_7, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_47 = paddle._C_ops.transpose(matmul_131, [0, 2, 1, 3]) + del matmul_131 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_66 = [slice_85, full_29, full_44] + del slice_85 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_64 = paddle._C_ops.stack(combine_66, 0) + del combine_66 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(transpose_47, stack_64) + del stack_64 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_39 = paddle._C_ops.matmul(reshape_69, parameter_206, False, False) + del parameter_206 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_52 = paddle._C_ops.add(matmul_39, parameter_205) + del parameter_205 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(add_52, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_260 = paddle._C_ops.reshape(reshape_70, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_48 = paddle._C_ops.transpose(reshape_260, [0, 1, 3, 2, 4, 5]) + del reshape_260 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(transpose_48, full_int_array_41) + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_71, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_67 = [slice_82, full_59, full_44] + del slice_82 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_65 = paddle._C_ops.stack(combine_67, 0) + del combine_67 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(roll_7, stack_65) + del stack_65 + + # pd_op.full: (xf32) <- () + full_8 = paddle._C_ops.full( + [], + float("0.969565"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_95 = full_8 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_48 = paddle._C_ops.shape64(reshape_72) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_88 = paddle._C_ops.slice( + shape64_48, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_48 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_68 = [slice_88, full_40, full_40] + del slice_88 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_66 = paddle._C_ops.stack(combine_68, 0) + del combine_68 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_12 = paddle._C_ops.uniform( + stack_66, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_66 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_186 = paddle._C_ops.add(full_8, uniform_12) + del uniform_12 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_12 = paddle._C_ops.floor(add_186) + del add_186 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_12 = paddle._C_ops.divide(reshape_72, full_8) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_12 = paddle._C_ops.multiply(divide_12, floor_12) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_53 = paddle._C_ops.add(add_48, multiply_12) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del parameter_202 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_54 = paddle._C_ops.add(matmul_40, parameter_201) + del parameter_201 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_54, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_41 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del parameter_200 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_55 = paddle._C_ops.add(matmul_41, parameter_199) + del parameter_199 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_49 = paddle._C_ops.shape64(add_55) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_89 = paddle._C_ops.slice( + shape64_49, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_49 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_69 = [slice_89, full_40, full_40] + del slice_89 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_67 = paddle._C_ops.stack(combine_69, 0) + del combine_69 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_13 = paddle._C_ops.uniform( + stack_67, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_67 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_187 = paddle._C_ops.add(full_8, uniform_13) + del uniform_13 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_13 = paddle._C_ops.floor(add_187) + del add_187 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_13 = paddle._C_ops.divide(add_55, full_8) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_13 = paddle._C_ops.multiply(divide_13, floor_13) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_56 = paddle._C_ops.add(add_53, multiply_13) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_50 = paddle._C_ops.shape64(add_56) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_90 = paddle._C_ops.slice( + shape64_50, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_50 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_70 = [slice_90, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_68 = paddle._C_ops.stack(combine_70, 0) + del combine_70 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(layer_norm_57, stack_68) + del stack_68 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_51 = paddle._C_ops.shape64(reshape_73) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_91 = paddle._C_ops.slice( + shape64_51, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_71 = [slice_91, full_57, full_28, full_57, full_28, full_44] + del slice_91 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_69 = paddle._C_ops.stack(combine_71, 0) + del combine_71 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_261 = paddle._C_ops.reshape(reshape_73, stack_69) + del stack_69 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_49 = paddle._C_ops.transpose(reshape_261, [0, 1, 3, 2, 4, 5]) + del reshape_261 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(transpose_49, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_75 = paddle._C_ops.reshape(reshape_74, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_52 = paddle._C_ops.shape64(reshape_75) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_92 = paddle._C_ops.slice( + shape64_52, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_52 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_42 = paddle._C_ops.matmul(reshape_75, parameter_196, False, False) + del parameter_196 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_57 = paddle._C_ops.add(matmul_42, parameter_195) + del parameter_195 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_72 = [slice_92, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_70 = paddle._C_ops.stack(combine_72, 0) + del combine_72 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_262 = paddle._C_ops.reshape(add_57, stack_70) + del stack_70 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_50 = paddle._C_ops.transpose(reshape_262, [2, 0, 3, 1, 4]) + del reshape_262 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_93 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_94 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_93, full_0, float("0"), True) + del slice_93 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_51 = paddle._C_ops.transpose(slice_94, [0, 1, 3, 2]) + del slice_94 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_43 = paddle._C_ops.matmul(scale_8, transpose_51, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_76 = paddle._C_ops.reshape(data_17, full_int_array_12) + del data_17 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_8 = paddle._C_ops.index_select(data_18, reshape_76, 0) + del data_18 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(index_select_8, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_52 = paddle._C_ops.transpose(reshape_263, [2, 0, 1]) + del reshape_263 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_52, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_188 = paddle._C_ops.add(matmul_43, unsqueeze_12) + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_8 = paddle._C_ops.softmax(add_188, -1) + del add_188 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_132 = paddle._C_ops.matmul(softmax_8, slice_8, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_53 = paddle._C_ops.transpose(matmul_132, [0, 2, 1, 3]) + del matmul_132 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_73 = [slice_92, full_29, full_44] + del slice_92 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_71 = paddle._C_ops.stack(combine_73, 0) + del combine_73 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_77 = paddle._C_ops.reshape(transpose_53, stack_71) + del stack_71 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_44 = paddle._C_ops.matmul(reshape_77, parameter_194, False, False) + del parameter_194 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_58 = paddle._C_ops.add(matmul_44, parameter_193) + del parameter_193 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(add_58, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_264 = paddle._C_ops.reshape(reshape_78, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_54 = paddle._C_ops.transpose(reshape_264, [0, 1, 3, 2, 4, 5]) + del reshape_264 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_54, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_74 = [slice_90, full_59, full_44] + del slice_90 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_72 = paddle._C_ops.stack(combine_74, 0) + del combine_74 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, stack_72) + del stack_72 + + # pd_op.full: (xf32) <- () + full_9 = paddle._C_ops.full( + [], + float("0.965217"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_104 = full_9 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_53 = paddle._C_ops.shape64(reshape_80) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_95 = paddle._C_ops.slice( + shape64_53, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_53 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_75 = [slice_95, full_40, full_40] + del slice_95 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_73 = paddle._C_ops.stack(combine_75, 0) + del combine_75 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_14 = paddle._C_ops.uniform( + stack_73, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_73 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_189 = paddle._C_ops.add(full_9, uniform_14) + del uniform_14 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_14 = paddle._C_ops.floor(add_189) + del add_189 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_14 = paddle._C_ops.divide(reshape_80, full_9) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_14 = paddle._C_ops.multiply(divide_14, floor_14) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_59 = paddle._C_ops.add(add_56, multiply_14) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_59, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_45 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_60 = paddle._C_ops.add(matmul_45, parameter_189) + del parameter_189 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_60, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_46 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del parameter_188 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_61 = paddle._C_ops.add(matmul_46, parameter_187) + del parameter_187 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_54 = paddle._C_ops.shape64(add_61) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_96 = paddle._C_ops.slice( + shape64_54, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_54 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_76 = [slice_96, full_40, full_40] + del slice_96 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_74 = paddle._C_ops.stack(combine_76, 0) + del combine_76 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_15 = paddle._C_ops.uniform( + stack_74, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_74 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_190 = paddle._C_ops.add(full_9, uniform_15) + del uniform_15 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_15 = paddle._C_ops.floor(add_190) + del add_190 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_15 = paddle._C_ops.divide(add_61, full_9) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_15 = paddle._C_ops.multiply(divide_15, floor_15) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_62 = paddle._C_ops.add(add_59, multiply_15) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_55 = paddle._C_ops.shape64(add_62) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_97 = paddle._C_ops.slice( + shape64_55, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_55 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_77 = [slice_97, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_75 = paddle._C_ops.stack(combine_77, 0) + del combine_77 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(layer_norm_63, stack_75) + del stack_75 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_56 = paddle._C_ops.shape64(reshape_81) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_98 = paddle._C_ops.slice( + shape64_56, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_56 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_81, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_57 = paddle._C_ops.shape64(roll_8) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_99 = paddle._C_ops.slice( + shape64_57, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_57 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_78 = [slice_99, full_57, full_28, full_57, full_28, full_44] + del slice_99 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_76 = paddle._C_ops.stack(combine_78, 0) + del combine_78 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(roll_8, stack_76) + del stack_76 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_55 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_55, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_65 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_65, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_65 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_266 = paddle._C_ops.reshape(set_value__4, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_150 = paddle._C_ops.transpose(reshape_266, [0, 1, 3, 2, 4, 5]) + del reshape_266 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_267 = paddle._C_ops.reshape(transpose_150, full_int_array_31) + del transpose_150 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_268 = paddle._C_ops.reshape(reshape_267, full_int_array_32) + del reshape_267 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_0) + del reshape_268 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_61, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_9 = paddle._C_ops.where(equal_4, full_62, where_8) + del equal_4, where_8 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_58 = paddle._C_ops.shape64(reshape_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_100 = paddle._C_ops.slice( + shape64_58, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_58 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_47 = paddle._C_ops.matmul(reshape_83, parameter_184, False, False) + del parameter_184 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_63 = paddle._C_ops.add(matmul_47, parameter_183) + del parameter_183 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_79 = [slice_100, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_77 = paddle._C_ops.stack(combine_79, 0) + del combine_79 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_269 = paddle._C_ops.reshape(add_63, stack_77) + del stack_77 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_56 = paddle._C_ops.transpose(reshape_269, [2, 0, 3, 1, 4]) + del reshape_269 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_101 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_102 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_101, full_0, float("0"), True) + del slice_101 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_57 = paddle._C_ops.transpose(slice_102, [0, 1, 3, 2]) + del slice_102 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_48 = paddle._C_ops.matmul(scale_9, transpose_57, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_84 = paddle._C_ops.reshape(data_19, full_int_array_12) + del data_19 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_9 = paddle._C_ops.index_select(data_20, reshape_84, 0) + del data_20 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_270 = paddle._C_ops.reshape(index_select_9, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_58 = paddle._C_ops.transpose(reshape_270, [2, 0, 1]) + del reshape_270 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(transpose_58, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_64 = paddle._C_ops.add(matmul_48, unsqueeze_13) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_4 = paddle._C_ops.floor_divide(slice_100, full_63) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_80 = [floor_divide_4, full_47, full_58, full_29, full_29] + del floor_divide_4 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_78 = paddle._C_ops.stack(combine_80, 0) + del combine_80 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_85 = paddle._C_ops.reshape(add_64, stack_78) + del stack_78 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(where_9, full_int_array_8) + del where_9 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(unsqueeze_51, full_int_array_7) + del unsqueeze_51 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_65 = paddle._C_ops.add(reshape_85, unsqueeze_14) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_81 = [slice_100, full_58, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_79 = paddle._C_ops.stack(combine_81, 0) + del combine_81 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_271 = paddle._C_ops.reshape(add_65, stack_79) + del stack_79 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_9 = paddle._C_ops.softmax(reshape_271, -1) + del reshape_271 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_133 = paddle._C_ops.matmul(softmax_9, slice_9, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_59 = paddle._C_ops.transpose(matmul_133, [0, 2, 1, 3]) + del matmul_133 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_82 = [slice_100, full_29, full_44] + del slice_100 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_80 = paddle._C_ops.stack(combine_82, 0) + del combine_82 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(transpose_59, stack_80) + del stack_80 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(reshape_86, parameter_182, False, False) + del parameter_182 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_66 = paddle._C_ops.add(matmul_49, parameter_181) + del parameter_181 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(add_66, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_272 = paddle._C_ops.reshape(reshape_87, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_60 = paddle._C_ops.transpose(reshape_272, [0, 1, 3, 2, 4, 5]) + del reshape_272 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(transpose_60, full_int_array_41) + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_88, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_83 = [slice_97, full_59, full_44] + del slice_97 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_81 = paddle._C_ops.stack(combine_83, 0) + del combine_83 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(roll_9, stack_81) + del stack_81 + + # pd_op.full: (xf32) <- () + full_10 = paddle._C_ops.full( + [], + float("0.96087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_115 = full_10 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_59 = paddle._C_ops.shape64(reshape_89) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_103 = paddle._C_ops.slice( + shape64_59, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_59 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_84 = [slice_103, full_40, full_40] + del slice_103 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_82 = paddle._C_ops.stack(combine_84, 0) + del combine_84 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_16 = paddle._C_ops.uniform( + stack_82, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_82 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_191 = paddle._C_ops.add(full_10, uniform_16) + del uniform_16 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_16 = paddle._C_ops.floor(add_191) + del add_191 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_16 = paddle._C_ops.divide(reshape_89, full_10) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_16 = paddle._C_ops.multiply(divide_16, floor_16) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_67 = paddle._C_ops.add(add_62, multiply_16) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_67, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del parameter_178 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_68 = paddle._C_ops.add(matmul_50, parameter_177) + del parameter_177 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_68, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_51 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del parameter_176 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_69 = paddle._C_ops.add(matmul_51, parameter_175) + del parameter_175 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_60 = paddle._C_ops.shape64(add_69) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_104 = paddle._C_ops.slice( + shape64_60, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_60 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_85 = [slice_104, full_40, full_40] + del slice_104 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_83 = paddle._C_ops.stack(combine_85, 0) + del combine_85 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_17 = paddle._C_ops.uniform( + stack_83, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_83 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_192 = paddle._C_ops.add(full_10, uniform_17) + del uniform_17 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_17 = paddle._C_ops.floor(add_192) + del add_192 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_17 = paddle._C_ops.divide(add_69, full_10) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_17 = paddle._C_ops.multiply(divide_17, floor_17) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_70 = paddle._C_ops.add(add_67, multiply_17) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_61 = paddle._C_ops.shape64(add_70) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_105 = paddle._C_ops.slice( + shape64_61, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_61 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_86 = [slice_105, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_84 = paddle._C_ops.stack(combine_86, 0) + del combine_86 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(layer_norm_69, stack_84) + del stack_84 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_62 = paddle._C_ops.shape64(reshape_90) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_106 = paddle._C_ops.slice( + shape64_62, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_62 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_87 = [slice_106, full_57, full_28, full_57, full_28, full_44] + del slice_106 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_85 = paddle._C_ops.stack(combine_87, 0) + del combine_87 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_273 = paddle._C_ops.reshape(reshape_90, stack_85) + del stack_85 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_61 = paddle._C_ops.transpose(reshape_273, [0, 1, 3, 2, 4, 5]) + del reshape_273 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_91 = paddle._C_ops.reshape(transpose_61, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_92 = paddle._C_ops.reshape(reshape_91, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_63 = paddle._C_ops.shape64(reshape_92) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_107 = paddle._C_ops.slice( + shape64_63, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_63 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_52 = paddle._C_ops.matmul(reshape_92, parameter_172, False, False) + del parameter_172 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_71 = paddle._C_ops.add(matmul_52, parameter_171) + del parameter_171 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_88 = [slice_107, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_86 = paddle._C_ops.stack(combine_88, 0) + del combine_88 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_274 = paddle._C_ops.reshape(add_71, stack_86) + del stack_86 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_62 = paddle._C_ops.transpose(reshape_274, [2, 0, 3, 1, 4]) + del reshape_274 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_108 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_109 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_108, full_0, float("0"), True) + del slice_108 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_63 = paddle._C_ops.transpose(slice_109, [0, 1, 3, 2]) + del slice_109 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_53 = paddle._C_ops.matmul(scale_10, transpose_63, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_93 = paddle._C_ops.reshape(data_21, full_int_array_12) + del data_21 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_10 = paddle._C_ops.index_select(data_22, reshape_93, 0) + del data_22 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(index_select_10, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_64 = paddle._C_ops.transpose(reshape_275, [2, 0, 1]) + del reshape_275 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_64, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_193 = paddle._C_ops.add(matmul_53, unsqueeze_15) + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_10 = paddle._C_ops.softmax(add_193, -1) + del add_193 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_134 = paddle._C_ops.matmul(softmax_10, slice_10, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_65 = paddle._C_ops.transpose(matmul_134, [0, 2, 1, 3]) + del matmul_134 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_89 = [slice_107, full_29, full_44] + del slice_107 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_87 = paddle._C_ops.stack(combine_89, 0) + del combine_89 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_94 = paddle._C_ops.reshape(transpose_65, stack_87) + del stack_87 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(reshape_94, parameter_170, False, False) + del parameter_170 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_72 = paddle._C_ops.add(matmul_54, parameter_169) + del parameter_169 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_95 = paddle._C_ops.reshape(add_72, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_276 = paddle._C_ops.reshape(reshape_95, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_66 = paddle._C_ops.transpose(reshape_276, [0, 1, 3, 2, 4, 5]) + del reshape_276 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_66, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_90 = [slice_105, full_59, full_44] + del slice_105 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_88 = paddle._C_ops.stack(combine_90, 0) + del combine_90 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, stack_88) + del stack_88 + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], + float("0.956522"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_124 = full_11 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_64 = paddle._C_ops.shape64(reshape_97) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_110 = paddle._C_ops.slice( + shape64_64, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_64 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_91 = [slice_110, full_40, full_40] + del slice_110 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_89 = paddle._C_ops.stack(combine_91, 0) + del combine_91 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_18 = paddle._C_ops.uniform( + stack_89, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_89 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_194 = paddle._C_ops.add(full_11, uniform_18) + del uniform_18 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_18 = paddle._C_ops.floor(add_194) + del add_194 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_18 = paddle._C_ops.divide(reshape_97, full_11) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_18 = paddle._C_ops.multiply(divide_18, floor_18) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_73 = paddle._C_ops.add(add_70, multiply_18) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del parameter_166 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_74 = paddle._C_ops.add(matmul_55, parameter_165) + del parameter_165 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_74, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_56 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del parameter_164 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_56, parameter_163) + del parameter_163 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_65 = paddle._C_ops.shape64(add_75) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_111 = paddle._C_ops.slice( + shape64_65, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_65 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_92 = [slice_111, full_40, full_40] + del slice_111 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_90 = paddle._C_ops.stack(combine_92, 0) + del combine_92 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_19 = paddle._C_ops.uniform( + stack_90, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_90 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_195 = paddle._C_ops.add(full_11, uniform_19) + del uniform_19 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_19 = paddle._C_ops.floor(add_195) + del add_195 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_19 = paddle._C_ops.divide(add_75, full_11) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_19 = paddle._C_ops.multiply(divide_19, floor_19) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_76 = paddle._C_ops.add(add_73, multiply_19) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_66 = paddle._C_ops.shape64(add_76) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_112 = paddle._C_ops.slice( + shape64_66, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_66 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_93 = [slice_112, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_91 = paddle._C_ops.stack(combine_93, 0) + del combine_93 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(layer_norm_75, stack_91) + del stack_91 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_67 = paddle._C_ops.shape64(reshape_98) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_113 = paddle._C_ops.slice( + shape64_67, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_67 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_98, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_68 = paddle._C_ops.shape64(roll_10) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_114 = paddle._C_ops.slice( + shape64_68, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_68 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_94 = [slice_114, full_57, full_28, full_57, full_28, full_44] + del slice_114 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_92 = paddle._C_ops.stack(combine_94, 0) + del combine_94 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(roll_10, stack_92) + del stack_92 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_67 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_99 = paddle._C_ops.reshape(transpose_67, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(reshape_99, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_66 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_66, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_66 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_278 = paddle._C_ops.reshape(set_value__5, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_278, [0, 1, 3, 2, 4, 5]) + del reshape_278 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_279 = paddle._C_ops.reshape(transpose_151, full_int_array_31) + del transpose_151 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_280 = paddle._C_ops.reshape(reshape_279, full_int_array_32) + del reshape_279 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_0) + del reshape_280 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_52, unsqueeze_53) + del unsqueeze_52, unsqueeze_53 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_61, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_11 = paddle._C_ops.where(equal_5, full_62, where_10) + del equal_5, where_10 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_69 = paddle._C_ops.shape64(reshape_100) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_115 = paddle._C_ops.slice( + shape64_69, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_69 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_57 = paddle._C_ops.matmul(reshape_100, parameter_160, False, False) + del parameter_160 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_77 = paddle._C_ops.add(matmul_57, parameter_159) + del parameter_159 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_95 = [slice_115, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_93 = paddle._C_ops.stack(combine_95, 0) + del combine_95 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_281 = paddle._C_ops.reshape(add_77, stack_93) + del stack_93 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_68 = paddle._C_ops.transpose(reshape_281, [2, 0, 3, 1, 4]) + del reshape_281 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_116 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_117 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_116, full_0, float("0"), True) + del slice_116 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_69 = paddle._C_ops.transpose(slice_117, [0, 1, 3, 2]) + del slice_117 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_58 = paddle._C_ops.matmul(scale_11, transpose_69, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_101 = paddle._C_ops.reshape(data_23, full_int_array_12) + del data_23 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_11 = paddle._C_ops.index_select(data_24, reshape_101, 0) + del data_24 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_282 = paddle._C_ops.reshape(index_select_11, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_70 = paddle._C_ops.transpose(reshape_282, [2, 0, 1]) + del reshape_282 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(transpose_70, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_78 = paddle._C_ops.add(matmul_58, unsqueeze_16) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_5 = paddle._C_ops.floor_divide(slice_115, full_63) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_96 = [floor_divide_5, full_47, full_58, full_29, full_29] + del floor_divide_5 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_94 = paddle._C_ops.stack(combine_96, 0) + del combine_96 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_102 = paddle._C_ops.reshape(add_78, stack_94) + del stack_94 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(where_11, full_int_array_8) + del where_11 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_54, full_int_array_7) + del unsqueeze_54 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_79 = paddle._C_ops.add(reshape_102, unsqueeze_17) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_97 = [slice_115, full_58, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_95 = paddle._C_ops.stack(combine_97, 0) + del combine_97 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_283 = paddle._C_ops.reshape(add_79, stack_95) + del stack_95 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_11 = paddle._C_ops.softmax(reshape_283, -1) + del reshape_283 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_135 = paddle._C_ops.matmul(softmax_11, slice_11, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_71 = paddle._C_ops.transpose(matmul_135, [0, 2, 1, 3]) + del matmul_135 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_98 = [slice_115, full_29, full_44] + del slice_115 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_96 = paddle._C_ops.stack(combine_98, 0) + del combine_98 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_103 = paddle._C_ops.reshape(transpose_71, stack_96) + del stack_96 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(reshape_103, parameter_158, False, False) + del parameter_158 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_80 = paddle._C_ops.add(matmul_59, parameter_157) + del parameter_157 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(add_80, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(reshape_104, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_72 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_105 = paddle._C_ops.reshape(transpose_72, full_int_array_41) + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_105, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_99 = [slice_112, full_59, full_44] + del slice_112 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_97 = paddle._C_ops.stack(combine_99, 0) + del combine_99 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_106 = paddle._C_ops.reshape(roll_11, stack_97) + del stack_97 + + # pd_op.full: (xf32) <- () + full_12 = paddle._C_ops.full( + [], + float("0.952174"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_135 = full_12 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_70 = paddle._C_ops.shape64(reshape_106) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_118 = paddle._C_ops.slice( + shape64_70, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_70 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_100 = [slice_118, full_40, full_40] + del slice_118 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_98 = paddle._C_ops.stack(combine_100, 0) + del combine_100 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_20 = paddle._C_ops.uniform( + stack_98, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_98 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_196 = paddle._C_ops.add(full_12, uniform_20) + del uniform_20 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_20 = paddle._C_ops.floor(add_196) + del add_196 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_20 = paddle._C_ops.divide(reshape_106, full_12) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_20 = paddle._C_ops.multiply(divide_20, floor_20) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_81 = paddle._C_ops.add(add_76, multiply_20) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del parameter_154 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_82 = paddle._C_ops.add(matmul_60, parameter_153) + del parameter_153 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_82, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_61 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del parameter_152 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_83 = paddle._C_ops.add(matmul_61, parameter_151) + del parameter_151 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_71 = paddle._C_ops.shape64(add_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_119 = paddle._C_ops.slice( + shape64_71, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_71 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_101 = [slice_119, full_40, full_40] + del slice_119 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_99 = paddle._C_ops.stack(combine_101, 0) + del combine_101 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_21 = paddle._C_ops.uniform( + stack_99, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_99 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_197 = paddle._C_ops.add(full_12, uniform_21) + del uniform_21 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_21 = paddle._C_ops.floor(add_197) + del add_197 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_21 = paddle._C_ops.divide(add_83, full_12) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_21 = paddle._C_ops.multiply(divide_21, floor_21) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_84 = paddle._C_ops.add(add_81, multiply_21) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_72 = paddle._C_ops.shape64(add_84) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_120 = paddle._C_ops.slice( + shape64_72, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_72 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_84, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_102 = [slice_120, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_100 = paddle._C_ops.stack(combine_102, 0) + del combine_102 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(layer_norm_81, stack_100) + del stack_100 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_73 = paddle._C_ops.shape64(reshape_107) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_121 = paddle._C_ops.slice( + shape64_73, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_73 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_103 = [slice_121, full_57, full_28, full_57, full_28, full_44] + del slice_121 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_101 = paddle._C_ops.stack(combine_103, 0) + del combine_103 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_285 = paddle._C_ops.reshape(reshape_107, stack_101) + del stack_101 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_73 = paddle._C_ops.transpose(reshape_285, [0, 1, 3, 2, 4, 5]) + del reshape_285 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_73, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_74 = paddle._C_ops.shape64(reshape_109) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_122 = paddle._C_ops.slice( + shape64_74, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_74 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_62 = paddle._C_ops.matmul(reshape_109, parameter_148, False, False) + del parameter_148 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_85 = paddle._C_ops.add(matmul_62, parameter_147) + del parameter_147 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_104 = [slice_122, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_102 = paddle._C_ops.stack(combine_104, 0) + del combine_104 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_286 = paddle._C_ops.reshape(add_85, stack_102) + del stack_102 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_286, [2, 0, 3, 1, 4]) + del reshape_286 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_123 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_124 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_123, full_0, float("0"), True) + del slice_123 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_124, [0, 1, 3, 2]) + del slice_124 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_63 = paddle._C_ops.matmul(scale_12, transpose_75, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_110 = paddle._C_ops.reshape(data_25, full_int_array_12) + del data_25 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_12 = paddle._C_ops.index_select(data_26, reshape_110, 0) + del data_26 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_287 = paddle._C_ops.reshape(index_select_12, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_76 = paddle._C_ops.transpose(reshape_287, [2, 0, 1]) + del reshape_287 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_198 = paddle._C_ops.add(matmul_63, unsqueeze_18) + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_12 = paddle._C_ops.softmax(add_198, -1) + del add_198 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_136 = paddle._C_ops.matmul(softmax_12, slice_12, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_136, [0, 2, 1, 3]) + del matmul_136 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_105 = [slice_122, full_29, full_44] + del slice_122 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_103 = paddle._C_ops.stack(combine_105, 0) + del combine_105 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(transpose_77, stack_103) + del stack_103 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_64 = paddle._C_ops.matmul(reshape_111, parameter_146, False, False) + del parameter_146 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_86 = paddle._C_ops.add(matmul_64, parameter_145) + del parameter_145 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_112 = paddle._C_ops.reshape(add_86, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_288 = paddle._C_ops.reshape(reshape_112, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_78 = paddle._C_ops.transpose(reshape_288, [0, 1, 3, 2, 4, 5]) + del reshape_288 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_113 = paddle._C_ops.reshape(transpose_78, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_106 = [slice_120, full_59, full_44] + del slice_120 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_104 = paddle._C_ops.stack(combine_106, 0) + del combine_106 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_114 = paddle._C_ops.reshape(reshape_113, stack_104) + del stack_104 + + # pd_op.full: (xf32) <- () + full_13 = paddle._C_ops.full( + [], + float("0.947826"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_144 = full_13 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_75 = paddle._C_ops.shape64(reshape_114) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_125 = paddle._C_ops.slice( + shape64_75, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_75 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_107 = [slice_125, full_40, full_40] + del slice_125 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_105 = paddle._C_ops.stack(combine_107, 0) + del combine_107 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_22 = paddle._C_ops.uniform( + stack_105, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_105 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_199 = paddle._C_ops.add(full_13, uniform_22) + del uniform_22 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_22 = paddle._C_ops.floor(add_199) + del add_199 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_22 = paddle._C_ops.divide(reshape_114, full_13) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_22 = paddle._C_ops.multiply(divide_22, floor_22) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_87 = paddle._C_ops.add(add_84, multiply_22) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del parameter_142 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_88 = paddle._C_ops.add(matmul_65, parameter_141) + del parameter_141 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_12 = paddle._C_ops.gelu(add_88, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_66 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del parameter_140 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_89 = paddle._C_ops.add(matmul_66, parameter_139) + del parameter_139 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_76 = paddle._C_ops.shape64(add_89) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_126 = paddle._C_ops.slice( + shape64_76, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_76 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_108 = [slice_126, full_40, full_40] + del slice_126 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_106 = paddle._C_ops.stack(combine_108, 0) + del combine_108 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_23 = paddle._C_ops.uniform( + stack_106, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_106 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_200 = paddle._C_ops.add(full_13, uniform_23) + del uniform_23 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_23 = paddle._C_ops.floor(add_200) + del add_200 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_23 = paddle._C_ops.divide(add_89, full_13) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_23 = paddle._C_ops.multiply(divide_23, floor_23) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_90 = paddle._C_ops.add(add_87, multiply_23) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_77 = paddle._C_ops.shape64(add_90) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_127 = paddle._C_ops.slice( + shape64_77, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_77 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_109 = [slice_127, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_107 = paddle._C_ops.stack(combine_109, 0) + del combine_109 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(layer_norm_87, stack_107) + del stack_107 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_78 = paddle._C_ops.shape64(reshape_115) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_128 = paddle._C_ops.slice( + shape64_78, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_78 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_115, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_79 = paddle._C_ops.shape64(roll_12) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_129 = paddle._C_ops.slice( + shape64_79, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_79 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_110 = [slice_129, full_57, full_28, full_57, full_28, full_44] + del slice_129 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_108 = paddle._C_ops.stack(combine_110, 0) + del combine_110 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_289 = paddle._C_ops.reshape(roll_12, stack_108) + del stack_108 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_79 = paddle._C_ops.transpose(reshape_289, [0, 1, 3, 2, 4, 5]) + del reshape_289 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_116 = paddle._C_ops.reshape(transpose_79, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_117 = paddle._C_ops.reshape(reshape_116, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_67 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_67, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_67 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_290 = paddle._C_ops.reshape(set_value__6, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_152 = paddle._C_ops.transpose(reshape_290, [0, 1, 3, 2, 4, 5]) + del reshape_290 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(transpose_152, full_int_array_31) + del transpose_152 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_292 = paddle._C_ops.reshape(reshape_291, full_int_array_32) + del reshape_291 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_0) + del reshape_292 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_61, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_13 = paddle._C_ops.where(equal_6, full_62, where_12) + del equal_6, where_12 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_80 = paddle._C_ops.shape64(reshape_117) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_130 = paddle._C_ops.slice( + shape64_80, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_80 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_67 = paddle._C_ops.matmul(reshape_117, parameter_136, False, False) + del parameter_136 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_91 = paddle._C_ops.add(matmul_67, parameter_135) + del parameter_135 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_111 = [slice_130, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_109 = paddle._C_ops.stack(combine_111, 0) + del combine_111 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_293 = paddle._C_ops.reshape(add_91, stack_109) + del stack_109 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_293, [2, 0, 3, 1, 4]) + del reshape_293 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_131 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_132 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_131, full_0, float("0"), True) + del slice_131 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_132, [0, 1, 3, 2]) + del slice_132 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_68 = paddle._C_ops.matmul(scale_13, transpose_81, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_118 = paddle._C_ops.reshape(data_27, full_int_array_12) + del data_27 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_13 = paddle._C_ops.index_select(data_28, reshape_118, 0) + del data_28 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_294 = paddle._C_ops.reshape(index_select_13, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_82 = paddle._C_ops.transpose(reshape_294, [2, 0, 1]) + del reshape_294 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_92 = paddle._C_ops.add(matmul_68, unsqueeze_19) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_6 = paddle._C_ops.floor_divide(slice_130, full_63) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_112 = [floor_divide_6, full_47, full_58, full_29, full_29] + del floor_divide_6 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_110 = paddle._C_ops.stack(combine_112, 0) + del combine_112 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_119 = paddle._C_ops.reshape(add_92, stack_110) + del stack_110 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(where_13, full_int_array_8) + del where_13 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(unsqueeze_57, full_int_array_7) + del unsqueeze_57 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_93 = paddle._C_ops.add(reshape_119, unsqueeze_20) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_113 = [slice_130, full_58, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_111 = paddle._C_ops.stack(combine_113, 0) + del combine_113 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(add_93, stack_111) + del stack_111 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_13 = paddle._C_ops.softmax(reshape_295, -1) + del reshape_295 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_13, slice_13, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_114 = [slice_130, full_29, full_44] + del slice_130 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_112 = paddle._C_ops.stack(combine_114, 0) + del combine_114 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_120 = paddle._C_ops.reshape(transpose_83, stack_112) + del stack_112 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_69 = paddle._C_ops.matmul(reshape_120, parameter_134, False, False) + del parameter_134 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_69, parameter_133) + del parameter_133 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(add_94, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_296 = paddle._C_ops.reshape(reshape_121, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_84 = paddle._C_ops.transpose(reshape_296, [0, 1, 3, 2, 4, 5]) + del reshape_296 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_122 = paddle._C_ops.reshape(transpose_84, full_int_array_41) + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_122, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_115 = [slice_127, full_59, full_44] + del slice_127 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_113 = paddle._C_ops.stack(combine_115, 0) + del combine_115 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_123 = paddle._C_ops.reshape(roll_13, stack_113) + del stack_113 + + # pd_op.full: (xf32) <- () + full_14 = paddle._C_ops.full( + [], + float("0.943478"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_155 = full_14 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_81 = paddle._C_ops.shape64(reshape_123) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_133 = paddle._C_ops.slice( + shape64_81, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_81 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_116 = [slice_133, full_40, full_40] + del slice_133 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_114 = paddle._C_ops.stack(combine_116, 0) + del combine_116 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_24 = paddle._C_ops.uniform( + stack_114, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_114 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_201 = paddle._C_ops.add(full_14, uniform_24) + del uniform_24 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_24 = paddle._C_ops.floor(add_201) + del add_201 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_24 = paddle._C_ops.divide(reshape_123, full_14) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_24 = paddle._C_ops.multiply(divide_24, floor_24) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_95 = paddle._C_ops.add(add_90, multiply_24) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del parameter_130 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_96 = paddle._C_ops.add(matmul_70, parameter_129) + del parameter_129 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_13 = paddle._C_ops.gelu(add_96, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_71 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del parameter_128 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_97 = paddle._C_ops.add(matmul_71, parameter_127) + del parameter_127 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_82 = paddle._C_ops.shape64(add_97) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_134 = paddle._C_ops.slice( + shape64_82, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_82 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_117 = [slice_134, full_40, full_40] + del slice_134 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_115 = paddle._C_ops.stack(combine_117, 0) + del combine_117 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_25 = paddle._C_ops.uniform( + stack_115, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_115 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_202 = paddle._C_ops.add(full_14, uniform_25) + del uniform_25 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_25 = paddle._C_ops.floor(add_202) + del add_202 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_25 = paddle._C_ops.divide(add_97, full_14) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_25 = paddle._C_ops.multiply(divide_25, floor_25) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_98 = paddle._C_ops.add(add_95, multiply_25) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_83 = paddle._C_ops.shape64(add_98) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_135 = paddle._C_ops.slice( + shape64_83, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_83 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_118 = [slice_135, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_116 = paddle._C_ops.stack(combine_118, 0) + del combine_118 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_124 = paddle._C_ops.reshape(layer_norm_93, stack_116) + del stack_116 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_84 = paddle._C_ops.shape64(reshape_124) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_136 = paddle._C_ops.slice( + shape64_84, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_84 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_119 = [slice_136, full_57, full_28, full_57, full_28, full_44] + del slice_136 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_117 = paddle._C_ops.stack(combine_119, 0) + del combine_119 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_297 = paddle._C_ops.reshape(reshape_124, stack_117) + del stack_117 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_85 = paddle._C_ops.transpose(reshape_297, [0, 1, 3, 2, 4, 5]) + del reshape_297 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_85, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_85 = paddle._C_ops.shape64(reshape_126) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_137 = paddle._C_ops.slice( + shape64_85, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_85 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_72 = paddle._C_ops.matmul(reshape_126, parameter_124, False, False) + del parameter_124 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_99 = paddle._C_ops.add(matmul_72, parameter_123) + del parameter_123 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_120 = [slice_137, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_118 = paddle._C_ops.stack(combine_120, 0) + del combine_120 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_298 = paddle._C_ops.reshape(add_99, stack_118) + del stack_118 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_86 = paddle._C_ops.transpose(reshape_298, [2, 0, 3, 1, 4]) + del reshape_298 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_138 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_139 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_138, full_0, float("0"), True) + del slice_138 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_87 = paddle._C_ops.transpose(slice_139, [0, 1, 3, 2]) + del slice_139 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_73 = paddle._C_ops.matmul(scale_14, transpose_87, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_127 = paddle._C_ops.reshape(data_29, full_int_array_12) + del data_29 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_14 = paddle._C_ops.index_select(data_30, reshape_127, 0) + del data_30 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_299 = paddle._C_ops.reshape(index_select_14, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_88 = paddle._C_ops.transpose(reshape_299, [2, 0, 1]) + del reshape_299 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_88, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_203 = paddle._C_ops.add(matmul_73, unsqueeze_21) + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_14 = paddle._C_ops.softmax(add_203, -1) + del add_203 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_138 = paddle._C_ops.matmul(softmax_14, slice_14, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_89 = paddle._C_ops.transpose(matmul_138, [0, 2, 1, 3]) + del matmul_138 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_121 = [slice_137, full_29, full_44] + del slice_137 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_119 = paddle._C_ops.stack(combine_121, 0) + del combine_121 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_128 = paddle._C_ops.reshape(transpose_89, stack_119) + del stack_119 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(reshape_128, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_100 = paddle._C_ops.add(matmul_74, parameter_121) + del parameter_121 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_129 = paddle._C_ops.reshape(add_100, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_300 = paddle._C_ops.reshape(reshape_129, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_90 = paddle._C_ops.transpose(reshape_300, [0, 1, 3, 2, 4, 5]) + del reshape_300 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_130 = paddle._C_ops.reshape(transpose_90, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_122 = [slice_135, full_59, full_44] + del slice_135 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_120 = paddle._C_ops.stack(combine_122, 0) + del combine_122 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_131 = paddle._C_ops.reshape(reshape_130, stack_120) + del stack_120 + + # pd_op.full: (xf32) <- () + full_15 = paddle._C_ops.full( + [], + float("0.93913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_164 = full_15 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_86 = paddle._C_ops.shape64(reshape_131) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_140 = paddle._C_ops.slice( + shape64_86, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_86 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_123 = [slice_140, full_40, full_40] + del slice_140 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_121 = paddle._C_ops.stack(combine_123, 0) + del combine_123 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_26 = paddle._C_ops.uniform( + stack_121, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_121 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_204 = paddle._C_ops.add(full_15, uniform_26) + del uniform_26 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_26 = paddle._C_ops.floor(add_204) + del add_204 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_26 = paddle._C_ops.divide(reshape_131, full_15) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_26 = paddle._C_ops.multiply(divide_26, floor_26) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_101 = paddle._C_ops.add(add_98, multiply_26) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del parameter_118 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_102 = paddle._C_ops.add(matmul_75, parameter_117) + del parameter_117 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_14 = paddle._C_ops.gelu(add_102, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_76 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del parameter_116 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_76, parameter_115) + del parameter_115 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_87 = paddle._C_ops.shape64(add_103) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_141 = paddle._C_ops.slice( + shape64_87, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_87 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_124 = [slice_141, full_40, full_40] + del slice_141 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_122 = paddle._C_ops.stack(combine_124, 0) + del combine_124 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_27 = paddle._C_ops.uniform( + stack_122, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_122 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_205 = paddle._C_ops.add(full_15, uniform_27) + del uniform_27 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_27 = paddle._C_ops.floor(add_205) + del add_205 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_27 = paddle._C_ops.divide(add_103, full_15) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_27 = paddle._C_ops.multiply(divide_27, floor_27) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_104 = paddle._C_ops.add(add_101, multiply_27) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_88 = paddle._C_ops.shape64(add_104) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_142 = paddle._C_ops.slice( + shape64_88, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_88 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_104, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_125 = [slice_142, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_123 = paddle._C_ops.stack(combine_125, 0) + del combine_125 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_132 = paddle._C_ops.reshape(layer_norm_99, stack_123) + del stack_123 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_89 = paddle._C_ops.shape64(reshape_132) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_143 = paddle._C_ops.slice( + shape64_89, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_89 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_132, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_90 = paddle._C_ops.shape64(roll_14) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_144 = paddle._C_ops.slice( + shape64_90, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_126 = [slice_144, full_57, full_28, full_57, full_28, full_44] + del slice_144 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_124 = paddle._C_ops.stack(combine_126, 0) + del combine_126 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_301 = paddle._C_ops.reshape(roll_14, stack_124) + del stack_124 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_91 = paddle._C_ops.transpose(reshape_301, [0, 1, 3, 2, 4, 5]) + del reshape_301 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_91, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_68 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_68, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_68 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_302 = paddle._C_ops.reshape(set_value__7, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_153 = paddle._C_ops.transpose(reshape_302, [0, 1, 3, 2, 4, 5]) + del reshape_302 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_303 = paddle._C_ops.reshape(transpose_153, full_int_array_31) + del transpose_153 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_304 = paddle._C_ops.reshape(reshape_303, full_int_array_32) + del reshape_303 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_0) + del reshape_304 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_58, unsqueeze_59) + del unsqueeze_58, unsqueeze_59 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_61, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_15 = paddle._C_ops.where(equal_7, full_62, where_14) + del equal_7, where_14 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_91 = paddle._C_ops.shape64(reshape_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_145 = paddle._C_ops.slice( + shape64_91, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_91 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_77 = paddle._C_ops.matmul(reshape_134, parameter_112, False, False) + del parameter_112 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_105 = paddle._C_ops.add(matmul_77, parameter_111) + del parameter_111 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_127 = [slice_145, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_125 = paddle._C_ops.stack(combine_127, 0) + del combine_127 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_305 = paddle._C_ops.reshape(add_105, stack_125) + del stack_125 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_92 = paddle._C_ops.transpose(reshape_305, [2, 0, 3, 1, 4]) + del reshape_305 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_146 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_147 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_146, full_0, float("0"), True) + del slice_146 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_93 = paddle._C_ops.transpose(slice_147, [0, 1, 3, 2]) + del slice_147 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_78 = paddle._C_ops.matmul(scale_15, transpose_93, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_135 = paddle._C_ops.reshape(data_31, full_int_array_12) + del data_31 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_15 = paddle._C_ops.index_select(data_32, reshape_135, 0) + del data_32 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_306 = paddle._C_ops.reshape(index_select_15, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_94 = paddle._C_ops.transpose(reshape_306, [2, 0, 1]) + del reshape_306 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(transpose_94, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_106 = paddle._C_ops.add(matmul_78, unsqueeze_22) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_7 = paddle._C_ops.floor_divide(slice_145, full_63) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_128 = [floor_divide_7, full_47, full_58, full_29, full_29] + del floor_divide_7 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_126 = paddle._C_ops.stack(combine_128, 0) + del combine_128 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_136 = paddle._C_ops.reshape(add_106, stack_126) + del stack_126 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(where_15, full_int_array_8) + del where_15 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_60, full_int_array_7) + del unsqueeze_60 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_107 = paddle._C_ops.add(reshape_136, unsqueeze_23) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_129 = [slice_145, full_58, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_127 = paddle._C_ops.stack(combine_129, 0) + del combine_129 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(add_107, stack_127) + del stack_127 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_15 = paddle._C_ops.softmax(reshape_307, -1) + del reshape_307 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_139 = paddle._C_ops.matmul(softmax_15, slice_15, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_95 = paddle._C_ops.transpose(matmul_139, [0, 2, 1, 3]) + del matmul_139 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_130 = [slice_145, full_29, full_44] + del slice_145 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_128 = paddle._C_ops.stack(combine_130, 0) + del combine_130 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_137 = paddle._C_ops.reshape(transpose_95, stack_128) + del stack_128 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_79 = paddle._C_ops.matmul(reshape_137, parameter_110, False, False) + del parameter_110 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_108 = paddle._C_ops.add(matmul_79, parameter_109) + del parameter_109 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_138 = paddle._C_ops.reshape(add_108, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_308 = paddle._C_ops.reshape(reshape_138, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_96 = paddle._C_ops.transpose(reshape_308, [0, 1, 3, 2, 4, 5]) + del reshape_308 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_139 = paddle._C_ops.reshape(transpose_96, full_int_array_41) + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_139, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_131 = [slice_142, full_59, full_44] + del slice_142 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_129 = paddle._C_ops.stack(combine_131, 0) + del combine_131 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_140 = paddle._C_ops.reshape(roll_15, stack_129) + del stack_129 + + # pd_op.full: (xf32) <- () + full_16 = paddle._C_ops.full( + [], + float("0.934783"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_175 = full_16 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_92 = paddle._C_ops.shape64(reshape_140) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_148 = paddle._C_ops.slice( + shape64_92, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_92 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_132 = [slice_148, full_40, full_40] + del slice_148 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_130 = paddle._C_ops.stack(combine_132, 0) + del combine_132 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_28 = paddle._C_ops.uniform( + stack_130, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_130 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_206 = paddle._C_ops.add(full_16, uniform_28) + del uniform_28 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_28 = paddle._C_ops.floor(add_206) + del add_206 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_28 = paddle._C_ops.divide(reshape_140, full_16) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_28 = paddle._C_ops.multiply(divide_28, floor_28) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_109 = paddle._C_ops.add(add_104, multiply_28) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_109, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del parameter_106 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_110 = paddle._C_ops.add(matmul_80, parameter_105) + del parameter_105 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_15 = paddle._C_ops.gelu(add_110, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_81 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del parameter_104 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_111 = paddle._C_ops.add(matmul_81, parameter_103) + del parameter_103 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_93 = paddle._C_ops.shape64(add_111) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_149 = paddle._C_ops.slice( + shape64_93, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_93 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_133 = [slice_149, full_40, full_40] + del slice_149 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_131 = paddle._C_ops.stack(combine_133, 0) + del combine_133 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_29 = paddle._C_ops.uniform( + stack_131, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_131 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_207 = paddle._C_ops.add(full_16, uniform_29) + del uniform_29 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_29 = paddle._C_ops.floor(add_207) + del add_207 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_29 = paddle._C_ops.divide(add_111, full_16) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_29 = paddle._C_ops.multiply(divide_29, floor_29) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_112 = paddle._C_ops.add(add_109, multiply_29) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_94 = paddle._C_ops.shape64(add_112) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_150 = paddle._C_ops.slice( + shape64_94, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_94 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_112, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_134 = [slice_150, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_132 = paddle._C_ops.stack(combine_134, 0) + del combine_134 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(layer_norm_105, stack_132) + del stack_132 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_95 = paddle._C_ops.shape64(reshape_141) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_151 = paddle._C_ops.slice( + shape64_95, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_95 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_135 = [slice_151, full_57, full_28, full_57, full_28, full_44] + del slice_151 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_133 = paddle._C_ops.stack(combine_135, 0) + del combine_135 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_309 = paddle._C_ops.reshape(reshape_141, stack_133) + del stack_133 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_97 = paddle._C_ops.transpose(reshape_309, [0, 1, 3, 2, 4, 5]) + del reshape_309 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_142 = paddle._C_ops.reshape(transpose_97, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_143 = paddle._C_ops.reshape(reshape_142, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_96 = paddle._C_ops.shape64(reshape_143) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_152 = paddle._C_ops.slice( + shape64_96, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_96 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_82 = paddle._C_ops.matmul(reshape_143, parameter_100, False, False) + del parameter_100 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_113 = paddle._C_ops.add(matmul_82, parameter_99) + del parameter_99 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_136 = [slice_152, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_134 = paddle._C_ops.stack(combine_136, 0) + del combine_136 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_310 = paddle._C_ops.reshape(add_113, stack_134) + del stack_134 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_98 = paddle._C_ops.transpose(reshape_310, [2, 0, 3, 1, 4]) + del reshape_310 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_153 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_154 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_153, full_0, float("0"), True) + del slice_153 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_99 = paddle._C_ops.transpose(slice_154, [0, 1, 3, 2]) + del slice_154 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_83 = paddle._C_ops.matmul(scale_16, transpose_99, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_144 = paddle._C_ops.reshape(data_33, full_int_array_12) + del data_33 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_16 = paddle._C_ops.index_select(data_34, reshape_144, 0) + del data_34 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_311 = paddle._C_ops.reshape(index_select_16, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_100 = paddle._C_ops.transpose(reshape_311, [2, 0, 1]) + del reshape_311 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_100, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_208 = paddle._C_ops.add(matmul_83, unsqueeze_24) + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_16 = paddle._C_ops.softmax(add_208, -1) + del add_208 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_140 = paddle._C_ops.matmul(softmax_16, slice_16, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_101 = paddle._C_ops.transpose(matmul_140, [0, 2, 1, 3]) + del matmul_140 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_137 = [slice_152, full_29, full_44] + del slice_152 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_135 = paddle._C_ops.stack(combine_137, 0) + del combine_137 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_145 = paddle._C_ops.reshape(transpose_101, stack_135) + del stack_135 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_84 = paddle._C_ops.matmul(reshape_145, parameter_98, False, False) + del parameter_98 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_114 = paddle._C_ops.add(matmul_84, parameter_97) + del parameter_97 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_114, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_312 = paddle._C_ops.reshape(reshape_146, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_102 = paddle._C_ops.transpose(reshape_312, [0, 1, 3, 2, 4, 5]) + del reshape_312 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_147 = paddle._C_ops.reshape(transpose_102, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_138 = [slice_150, full_59, full_44] + del slice_150 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_136 = paddle._C_ops.stack(combine_138, 0) + del combine_138 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_148 = paddle._C_ops.reshape(reshape_147, stack_136) + del stack_136 + + # pd_op.full: (xf32) <- () + full_17 = paddle._C_ops.full( + [], + float("0.930435"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_184 = full_17 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_97 = paddle._C_ops.shape64(reshape_148) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_155 = paddle._C_ops.slice( + shape64_97, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_97 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_139 = [slice_155, full_40, full_40] + del slice_155 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_137 = paddle._C_ops.stack(combine_139, 0) + del combine_139 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_30 = paddle._C_ops.uniform( + stack_137, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_137 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_209 = paddle._C_ops.add(full_17, uniform_30) + del uniform_30 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_30 = paddle._C_ops.floor(add_209) + del add_209 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_30 = paddle._C_ops.divide(reshape_148, full_17) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_30 = paddle._C_ops.multiply(divide_30, floor_30) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_115 = paddle._C_ops.add(add_112, multiply_30) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_115, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_85 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del parameter_94 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_116 = paddle._C_ops.add(matmul_85, parameter_93) + del parameter_93 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_16 = paddle._C_ops.gelu(add_116, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_86 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del parameter_92 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_117 = paddle._C_ops.add(matmul_86, parameter_91) + del parameter_91 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_98 = paddle._C_ops.shape64(add_117) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_156 = paddle._C_ops.slice( + shape64_98, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_98 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_140 = [slice_156, full_40, full_40] + del slice_156 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_138 = paddle._C_ops.stack(combine_140, 0) + del combine_140 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_31 = paddle._C_ops.uniform( + stack_138, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_138 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_210 = paddle._C_ops.add(full_17, uniform_31) + del uniform_31 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_31 = paddle._C_ops.floor(add_210) + del add_210 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_31 = paddle._C_ops.divide(add_117, full_17) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_31 = paddle._C_ops.multiply(divide_31, floor_31) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_118 = paddle._C_ops.add(add_115, multiply_31) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_99 = paddle._C_ops.shape64(add_118) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_157 = paddle._C_ops.slice( + shape64_99, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_99 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_141 = [slice_157, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_139 = paddle._C_ops.stack(combine_141, 0) + del combine_141 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_149 = paddle._C_ops.reshape(layer_norm_111, stack_139) + del stack_139 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_100 = paddle._C_ops.shape64(reshape_149) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_158 = paddle._C_ops.slice( + shape64_100, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_100 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_149, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_101 = paddle._C_ops.shape64(roll_16) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_159 = paddle._C_ops.slice( + shape64_101, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_101 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_142 = [slice_159, full_57, full_28, full_57, full_28, full_44] + del slice_159 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_140 = paddle._C_ops.stack(combine_142, 0) + del combine_142 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(roll_16, stack_140) + del stack_140 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_103 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_103, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(reshape_150, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_69 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_69, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_69 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_314 = paddle._C_ops.reshape(set_value__8, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_154 = paddle._C_ops.transpose(reshape_314, [0, 1, 3, 2, 4, 5]) + del reshape_314 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_315 = paddle._C_ops.reshape(transpose_154, full_int_array_31) + del transpose_154 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_316 = paddle._C_ops.reshape(reshape_315, full_int_array_32) + del reshape_315 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_0) + del reshape_316 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_61, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_17 = paddle._C_ops.where(equal_8, full_62, where_16) + del equal_8, where_16 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_102 = paddle._C_ops.shape64(reshape_151) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_160 = paddle._C_ops.slice( + shape64_102, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_102 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_87 = paddle._C_ops.matmul(reshape_151, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_119 = paddle._C_ops.add(matmul_87, parameter_87) + del parameter_87 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_143 = [slice_160, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_141 = paddle._C_ops.stack(combine_143, 0) + del combine_143 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_317 = paddle._C_ops.reshape(add_119, stack_141) + del stack_141 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_104 = paddle._C_ops.transpose(reshape_317, [2, 0, 3, 1, 4]) + del reshape_317 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_161 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_162 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_161, full_0, float("0"), True) + del slice_161 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_105 = paddle._C_ops.transpose(slice_162, [0, 1, 3, 2]) + del slice_162 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_88 = paddle._C_ops.matmul(scale_17, transpose_105, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_152 = paddle._C_ops.reshape(data_35, full_int_array_12) + del data_35 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_17 = paddle._C_ops.index_select(data_36, reshape_152, 0) + del data_36 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_17, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_106 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(transpose_106, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_120 = paddle._C_ops.add(matmul_88, unsqueeze_25) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_8 = paddle._C_ops.floor_divide(slice_160, full_63) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_144 = [floor_divide_8, full_47, full_58, full_29, full_29] + del floor_divide_8 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_142 = paddle._C_ops.stack(combine_144, 0) + del combine_144 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_153 = paddle._C_ops.reshape(add_120, stack_142) + del stack_142 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(where_17, full_int_array_8) + del where_17 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(unsqueeze_63, full_int_array_7) + del unsqueeze_63 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_121 = paddle._C_ops.add(reshape_153, unsqueeze_26) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_145 = [slice_160, full_58, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_143 = paddle._C_ops.stack(combine_145, 0) + del combine_145 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_319 = paddle._C_ops.reshape(add_121, stack_143) + del stack_143 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_17 = paddle._C_ops.softmax(reshape_319, -1) + del reshape_319 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_141 = paddle._C_ops.matmul(softmax_17, slice_17, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_107 = paddle._C_ops.transpose(matmul_141, [0, 2, 1, 3]) + del matmul_141 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_146 = [slice_160, full_29, full_44] + del slice_160 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_144 = paddle._C_ops.stack(combine_146, 0) + del combine_146 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_154 = paddle._C_ops.reshape(transpose_107, stack_144) + del stack_144 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(reshape_154, parameter_86, False, False) + del parameter_86 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_122 = paddle._C_ops.add(matmul_89, parameter_85) + del parameter_85 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_155 = paddle._C_ops.reshape(add_122, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_320 = paddle._C_ops.reshape(reshape_155, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_108 = paddle._C_ops.transpose(reshape_320, [0, 1, 3, 2, 4, 5]) + del reshape_320 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_156 = paddle._C_ops.reshape(transpose_108, full_int_array_41) + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_156, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_147 = [slice_157, full_59, full_44] + del slice_157 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_145 = paddle._C_ops.stack(combine_147, 0) + del combine_147 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_157 = paddle._C_ops.reshape(roll_17, stack_145) + del stack_145 + + # pd_op.full: (xf32) <- () + full_18 = paddle._C_ops.full( + [], + float("0.926087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_195 = full_18 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_103 = paddle._C_ops.shape64(reshape_157) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_163 = paddle._C_ops.slice( + shape64_103, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_103 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_148 = [slice_163, full_40, full_40] + del slice_163 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_146 = paddle._C_ops.stack(combine_148, 0) + del combine_148 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_32 = paddle._C_ops.uniform( + stack_146, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_146 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_211 = paddle._C_ops.add(full_18, uniform_32) + del uniform_32 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_32 = paddle._C_ops.floor(add_211) + del add_211 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_32 = paddle._C_ops.divide(reshape_157, full_18) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_32 = paddle._C_ops.multiply(divide_32, floor_32) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_123 = paddle._C_ops.add(add_118, multiply_32) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_123, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del parameter_82 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_124 = paddle._C_ops.add(matmul_90, parameter_81) + del parameter_81 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_17 = paddle._C_ops.gelu(add_124, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_91 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del parameter_80 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_125 = paddle._C_ops.add(matmul_91, parameter_79) + del parameter_79 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_104 = paddle._C_ops.shape64(add_125) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_164 = paddle._C_ops.slice( + shape64_104, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_104 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_149 = [slice_164, full_40, full_40] + del slice_164 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_147 = paddle._C_ops.stack(combine_149, 0) + del combine_149 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_33 = paddle._C_ops.uniform( + stack_147, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_147 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_212 = paddle._C_ops.add(full_18, uniform_33) + del uniform_33 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_33 = paddle._C_ops.floor(add_212) + del add_212 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_33 = paddle._C_ops.divide(add_125, full_18) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_33 = paddle._C_ops.multiply(divide_33, floor_33) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_126 = paddle._C_ops.add(add_123, multiply_33) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_105 = paddle._C_ops.shape64(add_126) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_165 = paddle._C_ops.slice( + shape64_105, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_105 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_126, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_150 = [slice_165, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_148 = paddle._C_ops.stack(combine_150, 0) + del combine_150 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_158 = paddle._C_ops.reshape(layer_norm_117, stack_148) + del stack_148 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_106 = paddle._C_ops.shape64(reshape_158) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_166 = paddle._C_ops.slice( + shape64_106, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_106 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_151 = [slice_166, full_57, full_28, full_57, full_28, full_44] + del slice_166 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_149 = paddle._C_ops.stack(combine_151, 0) + del combine_151 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_321 = paddle._C_ops.reshape(reshape_158, stack_149) + del stack_149 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_109 = paddle._C_ops.transpose(reshape_321, [0, 1, 3, 2, 4, 5]) + del reshape_321 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_159 = paddle._C_ops.reshape(transpose_109, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_160 = paddle._C_ops.reshape(reshape_159, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_107 = paddle._C_ops.shape64(reshape_160) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_167 = paddle._C_ops.slice( + shape64_107, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_107 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_92 = paddle._C_ops.matmul(reshape_160, parameter_76, False, False) + del parameter_76 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_127 = paddle._C_ops.add(matmul_92, parameter_75) + del parameter_75 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_152 = [slice_167, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_150 = paddle._C_ops.stack(combine_152, 0) + del combine_152 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_322 = paddle._C_ops.reshape(add_127, stack_150) + del stack_150 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_110 = paddle._C_ops.transpose(reshape_322, [2, 0, 3, 1, 4]) + del reshape_322 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_168 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_169 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_168, full_0, float("0"), True) + del slice_168 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_111 = paddle._C_ops.transpose(slice_169, [0, 1, 3, 2]) + del slice_169 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_93 = paddle._C_ops.matmul(scale_18, transpose_111, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_161 = paddle._C_ops.reshape(data_37, full_int_array_12) + del data_37 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_18 = paddle._C_ops.index_select(data_38, reshape_161, 0) + del data_38 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_323 = paddle._C_ops.reshape(index_select_18, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_112 = paddle._C_ops.transpose(reshape_323, [2, 0, 1]) + del reshape_323 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_112, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_213 = paddle._C_ops.add(matmul_93, unsqueeze_27) + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_18 = paddle._C_ops.softmax(add_213, -1) + del add_213 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_142 = paddle._C_ops.matmul(softmax_18, slice_18, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_113 = paddle._C_ops.transpose(matmul_142, [0, 2, 1, 3]) + del matmul_142 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_153 = [slice_167, full_29, full_44] + del slice_167 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_151 = paddle._C_ops.stack(combine_153, 0) + del combine_153 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_162 = paddle._C_ops.reshape(transpose_113, stack_151) + del stack_151 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_94 = paddle._C_ops.matmul(reshape_162, parameter_74, False, False) + del parameter_74 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_128 = paddle._C_ops.add(matmul_94, parameter_73) + del parameter_73 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_163 = paddle._C_ops.reshape(add_128, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_324 = paddle._C_ops.reshape(reshape_163, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_114 = paddle._C_ops.transpose(reshape_324, [0, 1, 3, 2, 4, 5]) + del reshape_324 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(transpose_114, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_154 = [slice_165, full_59, full_44] + del slice_165 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_152 = paddle._C_ops.stack(combine_154, 0) + del combine_154 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_165 = paddle._C_ops.reshape(reshape_164, stack_152) + del stack_152 + + # pd_op.full: (xf32) <- () + full_19 = paddle._C_ops.full( + [], + float("0.921739"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_204 = full_19 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_108 = paddle._C_ops.shape64(reshape_165) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_170 = paddle._C_ops.slice( + shape64_108, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_108 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_155 = [slice_170, full_40, full_40] + del slice_170 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_153 = paddle._C_ops.stack(combine_155, 0) + del combine_155 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_34 = paddle._C_ops.uniform( + stack_153, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_153 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_214 = paddle._C_ops.add(full_19, uniform_34) + del uniform_34 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_34 = paddle._C_ops.floor(add_214) + del add_214 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_34 = paddle._C_ops.divide(reshape_165, full_19) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_34 = paddle._C_ops.multiply(divide_34, floor_34) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_129 = paddle._C_ops.add(add_126, multiply_34) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_129, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del parameter_70 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_130 = paddle._C_ops.add(matmul_95, parameter_69) + del parameter_69 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_18 = paddle._C_ops.gelu(add_130, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_96 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del parameter_68 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_131 = paddle._C_ops.add(matmul_96, parameter_67) + del parameter_67 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_109 = paddle._C_ops.shape64(add_131) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_171 = paddle._C_ops.slice( + shape64_109, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_109 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_156 = [slice_171, full_40, full_40] + del slice_171 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_154 = paddle._C_ops.stack(combine_156, 0) + del combine_156 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_35 = paddle._C_ops.uniform( + stack_154, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_154 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_215 = paddle._C_ops.add(full_19, uniform_35) + del uniform_35 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_35 = paddle._C_ops.floor(add_215) + del add_215 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_35 = paddle._C_ops.divide(add_131, full_19) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_35 = paddle._C_ops.multiply(divide_35, floor_35) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_132 = paddle._C_ops.add(add_129, multiply_35) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_110 = paddle._C_ops.shape64(add_132) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_172 = paddle._C_ops.slice( + shape64_110, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_110 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_132, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_157 = [slice_172, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_155 = paddle._C_ops.stack(combine_157, 0) + del combine_157 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(layer_norm_123, stack_155) + del stack_155 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_111 = paddle._C_ops.shape64(reshape_166) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_173 = paddle._C_ops.slice( + shape64_111, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_111 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_166, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_112 = paddle._C_ops.shape64(roll_18) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_174 = paddle._C_ops.slice( + shape64_112, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_112 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_158 = [slice_174, full_57, full_28, full_57, full_28, full_44] + del slice_174 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_156 = paddle._C_ops.stack(combine_158, 0) + del combine_158 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_325 = paddle._C_ops.reshape(roll_18, stack_156) + del stack_156 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_115 = paddle._C_ops.transpose(reshape_325, [0, 1, 3, 2, 4, 5]) + del reshape_325 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_167 = paddle._C_ops.reshape(transpose_115, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_168 = paddle._C_ops.reshape(reshape_167, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_70 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_70, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_70 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_326 = paddle._C_ops.reshape(set_value__9, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_155 = paddle._C_ops.transpose(reshape_326, [0, 1, 3, 2, 4, 5]) + del reshape_326 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(transpose_155, full_int_array_31) + del transpose_155 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_328 = paddle._C_ops.reshape(reshape_327, full_int_array_32) + del reshape_327 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_0) + del reshape_328 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_64, unsqueeze_65) + del unsqueeze_64, unsqueeze_65 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_61, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_19 = paddle._C_ops.where(equal_9, full_62, where_18) + del equal_9, where_18 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_113 = paddle._C_ops.shape64(reshape_168) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_175 = paddle._C_ops.slice( + shape64_113, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_113 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_97 = paddle._C_ops.matmul(reshape_168, parameter_64, False, False) + del parameter_64 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_133 = paddle._C_ops.add(matmul_97, parameter_63) + del parameter_63 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_159 = [slice_175, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_157 = paddle._C_ops.stack(combine_159, 0) + del combine_159 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_329 = paddle._C_ops.reshape(add_133, stack_157) + del stack_157 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_116 = paddle._C_ops.transpose(reshape_329, [2, 0, 3, 1, 4]) + del reshape_329 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_176 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_177 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_176, full_0, float("0"), True) + del slice_176 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_117 = paddle._C_ops.transpose(slice_177, [0, 1, 3, 2]) + del slice_177 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_98 = paddle._C_ops.matmul(scale_19, transpose_117, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_169 = paddle._C_ops.reshape(data_39, full_int_array_12) + del data_39 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_19 = paddle._C_ops.index_select(data_40, reshape_169, 0) + del data_40 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_330 = paddle._C_ops.reshape(index_select_19, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_118 = paddle._C_ops.transpose(reshape_330, [2, 0, 1]) + del reshape_330 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(transpose_118, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_134 = paddle._C_ops.add(matmul_98, unsqueeze_28) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_9 = paddle._C_ops.floor_divide(slice_175, full_63) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_160 = [floor_divide_9, full_47, full_58, full_29, full_29] + del floor_divide_9 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_158 = paddle._C_ops.stack(combine_160, 0) + del combine_160 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_170 = paddle._C_ops.reshape(add_134, stack_158) + del stack_158 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(where_19, full_int_array_8) + del where_19 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_66, full_int_array_7) + del unsqueeze_66 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_135 = paddle._C_ops.add(reshape_170, unsqueeze_29) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_161 = [slice_175, full_58, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_159 = paddle._C_ops.stack(combine_161, 0) + del combine_161 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(add_135, stack_159) + del stack_159 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_19 = paddle._C_ops.softmax(reshape_331, -1) + del reshape_331 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_19, slice_19, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_119 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_162 = [slice_175, full_29, full_44] + del slice_175 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_160 = paddle._C_ops.stack(combine_162, 0) + del combine_162 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_171 = paddle._C_ops.reshape(transpose_119, stack_160) + del stack_160 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_99 = paddle._C_ops.matmul(reshape_171, parameter_62, False, False) + del parameter_62 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_136 = paddle._C_ops.add(matmul_99, parameter_61) + del parameter_61 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_172 = paddle._C_ops.reshape(add_136, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_332 = paddle._C_ops.reshape(reshape_172, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_120 = paddle._C_ops.transpose(reshape_332, [0, 1, 3, 2, 4, 5]) + del reshape_332 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_173 = paddle._C_ops.reshape(transpose_120, full_int_array_41) + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_173, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_163 = [slice_172, full_59, full_44] + del slice_172 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_161 = paddle._C_ops.stack(combine_163, 0) + del combine_163 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_174 = paddle._C_ops.reshape(roll_19, stack_161) + del stack_161 + + # pd_op.full: (xf32) <- () + full_20 = paddle._C_ops.full( + [], + float("0.917391"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_215 = full_20 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_114 = paddle._C_ops.shape64(reshape_174) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_178 = paddle._C_ops.slice( + shape64_114, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_114 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_164 = [slice_178, full_40, full_40] + del slice_178 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_162 = paddle._C_ops.stack(combine_164, 0) + del combine_164 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_36 = paddle._C_ops.uniform( + stack_162, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_162 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_216 = paddle._C_ops.add(full_20, uniform_36) + del uniform_36 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_36 = paddle._C_ops.floor(add_216) + del add_216 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_36 = paddle._C_ops.divide(reshape_174, full_20) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_36 = paddle._C_ops.multiply(divide_36, floor_36) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_137 = paddle._C_ops.add(add_132, multiply_36) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_137, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_100 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del parameter_58 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_138 = paddle._C_ops.add(matmul_100, parameter_57) + del parameter_57 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_19 = paddle._C_ops.gelu(add_138, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_101 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del parameter_56 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_139 = paddle._C_ops.add(matmul_101, parameter_55) + del parameter_55 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_115 = paddle._C_ops.shape64(add_139) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_179 = paddle._C_ops.slice( + shape64_115, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_115 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_165 = [slice_179, full_40, full_40] + del slice_179 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_163 = paddle._C_ops.stack(combine_165, 0) + del combine_165 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_37 = paddle._C_ops.uniform( + stack_163, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_163 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_217 = paddle._C_ops.add(full_20, uniform_37) + del uniform_37 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_37 = paddle._C_ops.floor(add_217) + del add_217 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_37 = paddle._C_ops.divide(add_139, full_20) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_37 = paddle._C_ops.multiply(divide_37, floor_37) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_140 = paddle._C_ops.add(add_137, multiply_37) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_116 = paddle._C_ops.shape64(add_140) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_180 = paddle._C_ops.slice( + shape64_116, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_116 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_166 = [slice_180, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_164 = paddle._C_ops.stack(combine_166, 0) + del combine_166 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(layer_norm_129, stack_164) + del stack_164 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_117 = paddle._C_ops.shape64(reshape_175) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_181 = paddle._C_ops.slice( + shape64_117, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_117 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_167 = [slice_181, full_57, full_28, full_57, full_28, full_44] + del slice_181 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_165 = paddle._C_ops.stack(combine_167, 0) + del combine_167 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_333 = paddle._C_ops.reshape(reshape_175, stack_165) + del stack_165 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_121 = paddle._C_ops.transpose(reshape_333, [0, 1, 3, 2, 4, 5]) + del reshape_333 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_176 = paddle._C_ops.reshape(transpose_121, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_177 = paddle._C_ops.reshape(reshape_176, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_118 = paddle._C_ops.shape64(reshape_177) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_182 = paddle._C_ops.slice( + shape64_118, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_118 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_102 = paddle._C_ops.matmul(reshape_177, parameter_52, False, False) + del parameter_52 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_141 = paddle._C_ops.add(matmul_102, parameter_51) + del parameter_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_168 = [slice_182, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_166 = paddle._C_ops.stack(combine_168, 0) + del combine_168 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_334 = paddle._C_ops.reshape(add_141, stack_166) + del stack_166 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_122 = paddle._C_ops.transpose(reshape_334, [2, 0, 3, 1, 4]) + del reshape_334 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_183 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_184 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_183, full_0, float("0"), True) + del slice_183 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_123 = paddle._C_ops.transpose(slice_184, [0, 1, 3, 2]) + del slice_184 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_103 = paddle._C_ops.matmul(scale_20, transpose_123, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_178 = paddle._C_ops.reshape(data_41, full_int_array_12) + del data_41 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_20 = paddle._C_ops.index_select(data_42, reshape_178, 0) + del data_42 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_20, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_124 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_124, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_218 = paddle._C_ops.add(matmul_103, unsqueeze_30) + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_20 = paddle._C_ops.softmax(add_218, -1) + del add_218 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_144 = paddle._C_ops.matmul(softmax_20, slice_20, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_125 = paddle._C_ops.transpose(matmul_144, [0, 2, 1, 3]) + del matmul_144 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_169 = [slice_182, full_29, full_44] + del slice_182 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_167 = paddle._C_ops.stack(combine_169, 0) + del combine_169 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_179 = paddle._C_ops.reshape(transpose_125, stack_167) + del stack_167 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_104 = paddle._C_ops.matmul(reshape_179, parameter_50, False, False) + del parameter_50 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_142 = paddle._C_ops.add(matmul_104, parameter_49) + del parameter_49 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_180 = paddle._C_ops.reshape(add_142, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_336 = paddle._C_ops.reshape(reshape_180, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_126 = paddle._C_ops.transpose(reshape_336, [0, 1, 3, 2, 4, 5]) + del reshape_336 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(transpose_126, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_170 = [slice_180, full_59, full_44] + del slice_180 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_168 = paddle._C_ops.stack(combine_170, 0) + del combine_170 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, stack_168) + del stack_168 + + # pd_op.full: (xf32) <- () + full_21 = paddle._C_ops.full( + [], + float("0.913043"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_224 = full_21 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_119 = paddle._C_ops.shape64(reshape_182) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_185 = paddle._C_ops.slice( + shape64_119, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_119 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_171 = [slice_185, full_40, full_40] + del slice_185 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_169 = paddle._C_ops.stack(combine_171, 0) + del combine_171 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_38 = paddle._C_ops.uniform( + stack_169, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_169 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_219 = paddle._C_ops.add(full_21, uniform_38) + del uniform_38 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_38 = paddle._C_ops.floor(add_219) + del add_219 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_38 = paddle._C_ops.divide(reshape_182, full_21) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_38 = paddle._C_ops.multiply(divide_38, floor_38) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_143 = paddle._C_ops.add(add_140, multiply_38) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_105 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del parameter_46 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_144 = paddle._C_ops.add(matmul_105, parameter_45) + del parameter_45 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_20 = paddle._C_ops.gelu(add_144, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_106 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del parameter_44 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_145 = paddle._C_ops.add(matmul_106, parameter_43) + del parameter_43 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_120 = paddle._C_ops.shape64(add_145) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_186 = paddle._C_ops.slice( + shape64_120, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_120 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_172 = [slice_186, full_40, full_40] + del slice_186 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_170 = paddle._C_ops.stack(combine_172, 0) + del combine_172 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_39 = paddle._C_ops.uniform( + stack_170, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_170 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_220 = paddle._C_ops.add(full_21, uniform_39) + del uniform_39 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_39 = paddle._C_ops.floor(add_220) + del add_220 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_39 = paddle._C_ops.divide(add_145, full_21) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_39 = paddle._C_ops.multiply(divide_39, floor_39) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_146 = paddle._C_ops.add(add_143, multiply_39) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_121 = paddle._C_ops.shape64(add_146) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_187 = paddle._C_ops.slice( + shape64_121, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_121 + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_146, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_173 = [slice_187, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_171 = paddle._C_ops.stack(combine_173, 0) + del combine_173 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(layer_norm_135, stack_171) + del stack_171 + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_122 = paddle._C_ops.shape64(reshape_183) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_188 = paddle._C_ops.slice( + shape64_122, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_122 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_183, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_123 = paddle._C_ops.shape64(roll_20) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_189 = paddle._C_ops.slice( + shape64_123, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_123 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_174 = [slice_189, full_57, full_28, full_57, full_28, full_44] + del full_57, slice_189 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_172 = paddle._C_ops.stack(combine_174, 0) + del combine_174 + + # pd_op.reshape: (-1x2x7x2x7x768xf32) <- (-1x14x14x768xf32, 6xi64) + reshape_337 = paddle._C_ops.reshape(roll_20, stack_172) + del stack_172 + + # pd_op.transpose: (-1x2x2x7x7x768xf32) <- (-1x2x7x2x7x768xf32) + transpose_127 = paddle._C_ops.transpose(reshape_337, [0, 1, 3, 2, 4, 5]) + del reshape_337 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x2x2x7x7x768xf32, 4xi64) + reshape_184 = paddle._C_ops.reshape(transpose_127, full_int_array_38) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_185 = paddle._C_ops.reshape(reshape_184, full_int_array_39) + del full_int_array_39 + + # pd_op.full: (1x14x14x1xf32) <- () + full_71 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_71, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_71 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(set_value__10, full_int_array_42) + del full_int_array_42 + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_156 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_156, full_int_array_31) + del transpose_156 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, full_int_array_32) + del reshape_339 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_0) + del reshape_340 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_61, subtract_10) + del full_61, not_equal_10, subtract_10 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_35) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_21 = paddle._C_ops.where(equal_10, full_62, where_20) + del equal_10, full_62, where_20 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_124 = paddle._C_ops.shape64(reshape_185) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_190 = paddle._C_ops.slice( + shape64_124, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_124 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_107 = paddle._C_ops.matmul(reshape_185, parameter_40, False, False) + del parameter_40 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_147 = paddle._C_ops.add(matmul_107, parameter_39) + del parameter_39 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_175 = [slice_190, full_29, full_30, full_58, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_173 = paddle._C_ops.stack(combine_175, 0) + del combine_175 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_341 = paddle._C_ops.reshape(add_147, stack_173) + del stack_173 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_128 = paddle._C_ops.transpose(reshape_341, [2, 0, 3, 1, 4]) + del reshape_341 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_191 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_192 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_191, full_0, float("0"), True) + del slice_191 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_129 = paddle._C_ops.transpose(slice_192, [0, 1, 3, 2]) + del slice_192 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_108 = paddle._C_ops.matmul(scale_21, transpose_129, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_43, full_int_array_12) + del data_43 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_21 = paddle._C_ops.index_select(data_44, reshape_186, 0) + del data_44 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_342 = paddle._C_ops.reshape(index_select_21, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_130 = paddle._C_ops.transpose(reshape_342, [2, 0, 1]) + del reshape_342 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(transpose_130, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_148 = paddle._C_ops.add(matmul_108, unsqueeze_31) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_10 = paddle._C_ops.floor_divide(slice_190, full_63) + del full_63 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_176 = [floor_divide_10, full_47, full_58, full_29, full_29] + del floor_divide_10, full_47 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_174 = paddle._C_ops.stack(combine_176, 0) + del combine_176 + + # pd_op.reshape: (-1x4x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_187 = paddle._C_ops.reshape(add_148, stack_174) + del stack_174 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(where_21, full_int_array_8) + del where_21 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(unsqueeze_69, full_int_array_7) + del unsqueeze_69 + + # pd_op.add: (-1x4x24x49x49xf32) <- (-1x4x24x49x49xf32, 1x4x1x49x49xf32) + add_149 = paddle._C_ops.add(reshape_187, unsqueeze_32) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_177 = [slice_190, full_58, full_29, full_29] + del full_58 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_175 = paddle._C_ops.stack(combine_177, 0) + del combine_177 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x4x24x49x49xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(add_149, stack_175) + del stack_175 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_21 = paddle._C_ops.softmax(reshape_343, -1) + del reshape_343 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_145 = paddle._C_ops.matmul(softmax_21, slice_21, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_131 = paddle._C_ops.transpose(matmul_145, [0, 2, 1, 3]) + del matmul_145 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_178 = [slice_190, full_29, full_44] + del slice_190 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_176 = paddle._C_ops.stack(combine_178, 0) + del combine_178 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_131, stack_176) + del stack_176 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_109 = paddle._C_ops.matmul(reshape_188, parameter_38, False, False) + del parameter_38 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_150 = paddle._C_ops.add(matmul_109, parameter_37) + del parameter_37 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_150, full_int_array_38) + del full_int_array_38 + + # pd_op.reshape: (-1x2x2x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_344 = paddle._C_ops.reshape(reshape_189, full_int_array_40) + del full_int_array_40 + + # pd_op.transpose: (-1x2x7x2x7x768xf32) <- (-1x2x2x7x7x768xf32) + transpose_132 = paddle._C_ops.transpose(reshape_344, [0, 1, 3, 2, 4, 5]) + del reshape_344 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x2x7x2x7x768xf32, 4xi64) + reshape_190 = paddle._C_ops.reshape(transpose_132, full_int_array_41) + del full_int_array_41 + + # pd_op.roll: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_190, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_179 = [slice_187, full_59, full_44] + del full_59, slice_187 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_177 = paddle._C_ops.stack(combine_179, 0) + del combine_179 + + # pd_op.reshape: (-1x196x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_191 = paddle._C_ops.reshape(roll_21, stack_177) + del stack_177 + + # pd_op.full: (xf32) <- () + full_22 = paddle._C_ops.full( + [], + float("0.908696"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_235 = full_22 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_125 = paddle._C_ops.shape64(reshape_191) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_193 = paddle._C_ops.slice( + shape64_125, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_125 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_180 = [slice_193, full_40, full_40] + del slice_193 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_178 = paddle._C_ops.stack(combine_180, 0) + del combine_180 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_40 = paddle._C_ops.uniform( + stack_178, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_178 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_221 = paddle._C_ops.add(full_22, uniform_40) + del uniform_40 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_40 = paddle._C_ops.floor(add_221) + del add_221 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_40 = paddle._C_ops.divide(reshape_191, full_22) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_40 = paddle._C_ops.multiply(divide_40, floor_40) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_151 = paddle._C_ops.add(add_146, multiply_40) + + # pd_op.layer_norm: (-1x196x768xf32, -1x196xf32, -1x196xf32) <- (-1x196x768xf32, 768xf32, 768xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (-1x196x3072xf32) <- (-1x196x768xf32, 768x3072xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del parameter_34 + + # pd_op.add: (-1x196x3072xf32) <- (-1x196x3072xf32, 3072xf32) + add_152 = paddle._C_ops.add(matmul_110, parameter_33) + del parameter_33 + + # pd_op.gelu: (-1x196x3072xf32) <- (-1x196x3072xf32) + gelu_21 = paddle._C_ops.gelu(add_152, False) + + # pd_op.matmul: (-1x196x768xf32) <- (-1x196x3072xf32, 3072x768xf32) + matmul_111 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del parameter_32 + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, 768xf32) + add_153 = paddle._C_ops.add(matmul_111, parameter_31) + del parameter_31 + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_126 = paddle._C_ops.shape64(add_153) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_194 = paddle._C_ops.slice( + shape64_126, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_126 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_181 = [slice_194, full_40, full_40] + del slice_194 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_179 = paddle._C_ops.stack(combine_181, 0) + del combine_181 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_41 = paddle._C_ops.uniform( + stack_179, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_179 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_222 = paddle._C_ops.add(full_22, uniform_41) + del uniform_41 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_41 = paddle._C_ops.floor(add_222) + del add_222 + + # pd_op.divide: (-1x196x768xf32) <- (-1x196x768xf32, xf32) + divide_41 = paddle._C_ops.divide(add_153, full_22) + + # pd_op.multiply: (-1x196x768xf32) <- (-1x196x768xf32, -1x1x1xf32) + multiply_41 = paddle._C_ops.multiply(divide_41, floor_41) + + # pd_op.add: (-1x196x768xf32) <- (-1x196x768xf32, -1x196x768xf32) + add_154 = paddle._C_ops.add(add_151, multiply_41) + + # pd_op.shape64: (3xi64) <- (-1x196x768xf32) + shape64_127 = paddle._C_ops.shape64(add_154) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_195 = paddle._C_ops.slice( + shape64_127, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_127 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_182 = [slice_195, full_56, full_56, full_44] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_180 = paddle._C_ops.stack(combine_182, 0) + del combine_182 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x196x768xf32, 4xi64) + reshape_192 = paddle._C_ops.reshape(add_154, stack_180) + del stack_180 + + # pd_op.strided_slice: (-1x7x7x768xf32) <- (-1x14x14x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_16, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x7x7x768xf32) <- (-1x14x14x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_5, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x7x7x768xf32) <- (-1x14x14x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_6, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x7x7x768xf32) <- (-1x14x14x768xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_18, full_int_array_29, full_int_array_4 + ) + + # pd_op.shape64: (4xi64) <- (-1x14x14x768xf32) + shape64_128 = paddle._C_ops.shape64(reshape_192) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_196 = paddle._C_ops.slice( + shape64_128, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_128 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_183 = [slice_196, full_56, full_56, full_44] + del full_44, full_56, slice_196 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_181 = paddle._C_ops.stack(combine_183, 0) + del combine_183 + + # pd_op.reshape: (-1x14x14x768xf32) <- (-1x14x14x768xf32, 4xi64) + reshape_345 = paddle._C_ops.reshape(reshape_192, stack_181) + del stack_181 + + # builtin.combine: ([-1x7x7x768xf32, -1x7x7x768xf32, -1x7x7x768xf32, -1x7x7x768xf32]) <- (-1x7x7x768xf32, -1x7x7x768xf32, -1x7x7x768xf32, -1x7x7x768xf32) + combine_184 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + + # pd_op.concat: (-1x7x7x3072xf32) <- ([-1x7x7x768xf32, -1x7x7x768xf32, -1x7x7x768xf32, -1x7x7x768xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_184, full_2) + del combine_184 + + # pd_op.full: (xi64) <- () + full_72 = paddle._C_ops.full( + [], float("3072"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_185 = [slice_195, full_43, full_72] + del full_43, full_72, slice_195 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_182 = paddle._C_ops.stack(combine_185, 0) + del combine_185 + + # pd_op.reshape: (-1x-1x3072xf32) <- (-1x7x7x3072xf32, 3xi64) + reshape_193 = paddle._C_ops.reshape(concat_2, stack_182) + del stack_182 + + # pd_op.layer_norm: (-1x-1x3072xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x3072xf32, 3072xf32, 3072xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_193, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30 + + # pd_op.matmul: (-1x-1x1536xf32) <- (-1x-1x3072xf32, 3072x1536xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del parameter_28 + + # pd_op.shape64: (3xi64) <- (-1x-1x1536xf32) + shape64_129 = paddle._C_ops.shape64(matmul_112) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_197 = paddle._C_ops.slice( + shape64_129, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_129 + + # pd_op.shape64: (3xi64) <- (-1x-1x1536xf32) + shape64_130 = paddle._C_ops.shape64(matmul_112) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_198 = paddle._C_ops.slice( + shape64_130, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + del shape64_130 + + # pd_op.layer_norm: (-1x-1x1536xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1536xf32, 1536xf32, 1536xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_112, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_186 = [slice_197, full_28, full_28, full_55] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_183 = paddle._C_ops.stack(combine_186, 0) + del combine_186 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x-1x1536xf32, 4xi64) + reshape_194 = paddle._C_ops.reshape(layer_norm_144, stack_183) + del stack_183 + + # pd_op.shape64: (4xi64) <- (-1x7x7x1536xf32) + shape64_131 = paddle._C_ops.shape64(reshape_194) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_199 = paddle._C_ops.slice( + shape64_131, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_131 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_187 = [slice_199, full_40, full_28, full_40, full_28, full_55] + del slice_199 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_184 = paddle._C_ops.stack(combine_187, 0) + del combine_187 + + # pd_op.reshape: (-1x1x7x1x7x1536xf32) <- (-1x7x7x1536xf32, 6xi64) + reshape_346 = paddle._C_ops.reshape(reshape_194, stack_184) + del stack_184 + + # pd_op.transpose: (-1x1x1x7x7x1536xf32) <- (-1x1x7x1x7x1536xf32) + transpose_133 = paddle._C_ops.transpose(reshape_346, [0, 1, 3, 2, 4, 5]) + del reshape_346 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_43 = [-1, 7, 7, 1536] + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x1x1x7x7x1536xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_133, full_int_array_43) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_44 = [-1, 49, 1536] + + # pd_op.reshape: (-1x49x1536xf32) <- (-1x7x7x1536xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_44) + + # pd_op.shape64: (3xi64) <- (-1x49x1536xf32) + shape64_132 = paddle._C_ops.shape64(reshape_196) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_200 = paddle._C_ops.slice( + shape64_132, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_132 + + # pd_op.matmul: (-1x49x4608xf32) <- (-1x49x1536xf32, 1536x4608xf32) + matmul_113 = paddle._C_ops.matmul(reshape_196, parameter_25, False, False) + del parameter_25 + + # pd_op.add: (-1x49x4608xf32) <- (-1x49x4608xf32, 4608xf32) + add_155 = paddle._C_ops.add(matmul_113, parameter_24) + del parameter_24 + + # pd_op.full: (xi64) <- () + full_73 = paddle._C_ops.full( + [], float("48"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_188 = [slice_200, full_29, full_30, full_73, full_32] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_185 = paddle._C_ops.stack(combine_188, 0) + del combine_188 + + # pd_op.reshape: (-1x49x3x48x32xf32) <- (-1x49x4608xf32, 5xi64) + reshape_347 = paddle._C_ops.reshape(add_155, stack_185) + del stack_185 + + # pd_op.transpose: (3x-1x48x49x32xf32) <- (-1x49x3x48x32xf32) + transpose_134 = paddle._C_ops.transpose(reshape_347, [2, 0, 3, 1, 4]) + del reshape_347 + + # pd_op.slice: (-1x48x49x32xf32) <- (3x-1x48x49x32xf32, 1xi64, 1xi64) + slice_201 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x48x49x32xf32) <- (3x-1x48x49x32xf32, 1xi64, 1xi64) + slice_202 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x48x49x32xf32) <- (3x-1x48x49x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x48x49x32xf32) <- (-1x48x49x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_201, full_0, float("0"), True) + del slice_201 + + # pd_op.transpose: (-1x48x32x49xf32) <- (-1x48x49x32xf32) + transpose_135 = paddle._C_ops.transpose(slice_202, [0, 1, 3, 2]) + del slice_202 + + # pd_op.matmul: (-1x48x49x49xf32) <- (-1x48x49x32xf32, -1x48x32x49xf32) + matmul_114 = paddle._C_ops.matmul(scale_22, transpose_135, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_197 = paddle._C_ops.reshape(data_45, full_int_array_12) + del data_45 + + # pd_op.index_select: (2401x48xf32) <- (169x48xf32, 2401xi64) + index_select_22 = paddle._C_ops.index_select(data_46, reshape_197, 0) + del data_46 + + # pd_op.reshape: (49x49x48xf32) <- (2401x48xf32, 3xi64) + reshape_348 = paddle._C_ops.reshape(index_select_22, full_int_array_13) + + # pd_op.transpose: (48x49x49xf32) <- (49x49x48xf32) + transpose_136 = paddle._C_ops.transpose(reshape_348, [2, 0, 1]) + del reshape_348 + + # pd_op.unsqueeze: (1x48x49x49xf32) <- (48x49x49xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_136, full_int_array_7) + + # pd_op.add: (-1x48x49x49xf32) <- (-1x48x49x49xf32, 1x48x49x49xf32) + add_223 = paddle._C_ops.add(matmul_114, unsqueeze_33) + + # pd_op.softmax: (-1x48x49x49xf32) <- (-1x48x49x49xf32) + softmax_22 = paddle._C_ops.softmax(add_223, -1) + del add_223 + + # pd_op.matmul: (-1x48x49x32xf32) <- (-1x48x49x49xf32, -1x48x49x32xf32) + matmul_146 = paddle._C_ops.matmul(softmax_22, slice_22, False, False) + + # pd_op.transpose: (-1x49x48x32xf32) <- (-1x48x49x32xf32) + transpose_137 = paddle._C_ops.transpose(matmul_146, [0, 2, 1, 3]) + del matmul_146 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_189 = [slice_200, full_29, full_55] + del slice_200 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_186 = paddle._C_ops.stack(combine_189, 0) + del combine_189 + + # pd_op.reshape: (-1x49x1536xf32) <- (-1x49x48x32xf32, 3xi64) + reshape_198 = paddle._C_ops.reshape(transpose_137, stack_186) + del stack_186 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536x1536xf32) + matmul_115 = paddle._C_ops.matmul(reshape_198, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_156 = paddle._C_ops.add(matmul_115, parameter_22) + del parameter_22 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x49x1536xf32, 4xi64) + reshape_199 = paddle._C_ops.reshape(add_156, full_int_array_43) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_45 = [-1, 1, 1, 7, 7, 1536] + + # pd_op.reshape: (-1x1x1x7x7x1536xf32) <- (-1x7x7x1536xf32, 6xi64) + reshape_349 = paddle._C_ops.reshape(reshape_199, full_int_array_45) + + # pd_op.transpose: (-1x1x7x1x7x1536xf32) <- (-1x1x1x7x7x1536xf32) + transpose_138 = paddle._C_ops.transpose(reshape_349, [0, 1, 3, 2, 4, 5]) + del reshape_349 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x1x7x1x7x1536xf32, 4xi64) + reshape_200 = paddle._C_ops.reshape(transpose_138, full_int_array_43) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_190 = [slice_197, full_29, full_55] + del slice_197 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_187 = paddle._C_ops.stack(combine_190, 0) + del combine_190 + + # pd_op.reshape: (-1x49x1536xf32) <- (-1x7x7x1536xf32, 3xi64) + reshape_201 = paddle._C_ops.reshape(reshape_200, stack_187) + del stack_187 + + # pd_op.full: (xf32) <- () + full_23 = paddle._C_ops.full( + [], + float("0.904348"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_257 = full_23 + + # pd_op.shape64: (3xi64) <- (-1x49x1536xf32) + shape64_133 = paddle._C_ops.shape64(reshape_201) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_203 = paddle._C_ops.slice( + shape64_133, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_133 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_191 = [slice_203, full_40, full_40] + del slice_203 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_188 = paddle._C_ops.stack(combine_191, 0) + del combine_191 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_42 = paddle._C_ops.uniform( + stack_188, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_188 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_224 = paddle._C_ops.add(full_23, uniform_42) + del uniform_42 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_42 = paddle._C_ops.floor(add_224) + del add_224 + + # pd_op.divide: (-1x49x1536xf32) <- (-1x49x1536xf32, xf32) + divide_42 = paddle._C_ops.divide(reshape_201, full_23) + + # pd_op.multiply: (-1x49x1536xf32) <- (-1x49x1536xf32, -1x1x1xf32) + multiply_42 = paddle._C_ops.multiply(divide_42, floor_42) + + # pd_op.add: (-1x49x1536xf32) <- (-1x-1x1536xf32, -1x49x1536xf32) + add_157 = paddle._C_ops.add(matmul_112, multiply_42) + + # pd_op.layer_norm: (-1x49x1536xf32, -1x49xf32, -1x49xf32) <- (-1x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_157, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (-1x49x6144xf32) <- (-1x49x1536xf32, 1536x6144xf32) + matmul_116 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del parameter_19 + + # pd_op.add: (-1x49x6144xf32) <- (-1x49x6144xf32, 6144xf32) + add_158 = paddle._C_ops.add(matmul_116, parameter_18) + del parameter_18 + + # pd_op.gelu: (-1x49x6144xf32) <- (-1x49x6144xf32) + gelu_22 = paddle._C_ops.gelu(add_158, False) + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x6144xf32, 6144x1536xf32) + matmul_117 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_159 = paddle._C_ops.add(matmul_117, parameter_16) + del parameter_16 + + # pd_op.shape64: (3xi64) <- (-1x49x1536xf32) + shape64_134 = paddle._C_ops.shape64(add_159) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_204 = paddle._C_ops.slice( + shape64_134, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_134 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_192 = [slice_204, full_40, full_40] + del slice_204 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_189 = paddle._C_ops.stack(combine_192, 0) + del combine_192 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_43 = paddle._C_ops.uniform( + stack_189, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_189 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_225 = paddle._C_ops.add(full_23, uniform_43) + del uniform_43 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_43 = paddle._C_ops.floor(add_225) + del add_225 + + # pd_op.divide: (-1x49x1536xf32) <- (-1x49x1536xf32, xf32) + divide_43 = paddle._C_ops.divide(add_159, full_23) + + # pd_op.multiply: (-1x49x1536xf32) <- (-1x49x1536xf32, -1x1x1xf32) + multiply_43 = paddle._C_ops.multiply(divide_43, floor_43) + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, -1x49x1536xf32) + add_160 = paddle._C_ops.add(add_157, multiply_43) + + # pd_op.shape64: (3xi64) <- (-1x49x1536xf32) + shape64_135 = paddle._C_ops.shape64(add_160) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_205 = paddle._C_ops.slice( + shape64_135, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_135 + + # pd_op.layer_norm: (-1x49x1536xf32, -1x49xf32, -1x49xf32) <- (-1x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_160, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_193 = [slice_205, full_28, full_28, full_55] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_190 = paddle._C_ops.stack(combine_193, 0) + del combine_193 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x49x1536xf32, 4xi64) + reshape_202 = paddle._C_ops.reshape(layer_norm_150, stack_190) + del stack_190 + + # pd_op.shape64: (4xi64) <- (-1x7x7x1536xf32) + shape64_136 = paddle._C_ops.shape64(reshape_202) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_206 = paddle._C_ops.slice( + shape64_136, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_136 + + # pd_op.roll: (-1x7x7x1536xf32) <- (-1x7x7x1536xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_202, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x7x7x1536xf32) + shape64_137 = paddle._C_ops.shape64(roll_22) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_207 = paddle._C_ops.slice( + shape64_137, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_137 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_194 = [slice_207, full_40, full_28, full_40, full_28, full_55] + del full_28, slice_207 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_191 = paddle._C_ops.stack(combine_194, 0) + del combine_194 + + # pd_op.reshape: (-1x1x7x1x7x1536xf32) <- (-1x7x7x1536xf32, 6xi64) + reshape_350 = paddle._C_ops.reshape(roll_22, stack_191) + del stack_191 + + # pd_op.transpose: (-1x1x1x7x7x1536xf32) <- (-1x1x7x1x7x1536xf32) + transpose_139 = paddle._C_ops.transpose(reshape_350, [0, 1, 3, 2, 4, 5]) + del reshape_350 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x1x1x7x7x1536xf32, 4xi64) + reshape_203 = paddle._C_ops.reshape(transpose_139, full_int_array_43) + + # pd_op.reshape: (-1x49x1536xf32) <- (-1x7x7x1536xf32, 3xi64) + reshape_204 = paddle._C_ops.reshape(reshape_203, full_int_array_44) + del full_int_array_44 + + # pd_op.full: (1x7x7x1xf32) <- () + full_74 = paddle._C_ops.full( + [1, 7, 7, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_74, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_74, full_int_array_16 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_19, set_value__100 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_21, full_int_array_22, set_value__101 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_23, set_value__102 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_17, set_value__103 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_20, full_int_array_25, set_value__104 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_26, full_int_array_27, set_value__105 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_24, full_int_array_28, set_value__106 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_29, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_46 = [1, 1, 7, 1, 7, 1] + + # pd_op.reshape: (1x1x7x1x7x1xf32) <- (1x7x7x1xf32, 6xi64) + reshape_351 = paddle._C_ops.reshape(set_value__11, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (1x1x1x7x7x1xf32) <- (1x1x7x1x7x1xf32) + transpose_157 = paddle._C_ops.transpose(reshape_351, [0, 1, 3, 2, 4, 5]) + del reshape_351 + + # pd_op.reshape: (1x7x7x1xf32) <- (1x1x1x7x7x1xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(transpose_157, full_int_array_31) + del full_int_array_31, transpose_157 + + # pd_op.reshape: (1x49xf32) <- (1x7x7x1xf32, 2xi64) + reshape_353 = paddle._C_ops.reshape(reshape_352, full_int_array_32) + del full_int_array_32, reshape_352 + + # pd_op.unsqueeze: (1x1x49xf32) <- (1x49xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_8) + + # pd_op.unsqueeze: (1x49x1xf32) <- (1x49xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_0) + del reshape_353 + + # pd_op.subtract: (1x49x49xf32) <- (1x1x49xf32, 1x49x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_70, unsqueeze_71) + del unsqueeze_70, unsqueeze_71 + + # pd_op.not_equal: (1x49x49xb) <- (1x49x49xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_35) + + # pd_op.full: (1x49x49xf32) <- () + full_75 = paddle._C_ops.full( + [1, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_75, subtract_11) + del full_75, not_equal_11, subtract_11 + + # pd_op.equal: (1x49x49xb) <- (1x49x49xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_35) + del full_35 + + # pd_op.full: (1x49x49xf32) <- () + full_76 = paddle._C_ops.full( + [1, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_23 = paddle._C_ops.where(equal_11, full_76, where_22) + del equal_11, full_76, where_22 + + # pd_op.shape64: (3xi64) <- (-1x49x1536xf32) + shape64_138 = paddle._C_ops.shape64(reshape_204) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_208 = paddle._C_ops.slice( + shape64_138, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_138 + + # pd_op.matmul: (-1x49x4608xf32) <- (-1x49x1536xf32, 1536x4608xf32) + matmul_118 = paddle._C_ops.matmul(reshape_204, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (-1x49x4608xf32) <- (-1x49x4608xf32, 4608xf32) + add_161 = paddle._C_ops.add(matmul_118, parameter_12) + del parameter_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_195 = [slice_208, full_29, full_30, full_73, full_32] + del full_30, full_32 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_192 = paddle._C_ops.stack(combine_195, 0) + del combine_195 + + # pd_op.reshape: (-1x49x3x48x32xf32) <- (-1x49x4608xf32, 5xi64) + reshape_354 = paddle._C_ops.reshape(add_161, stack_192) + del stack_192 + + # pd_op.transpose: (3x-1x48x49x32xf32) <- (-1x49x3x48x32xf32) + transpose_140 = paddle._C_ops.transpose(reshape_354, [2, 0, 3, 1, 4]) + del reshape_354 + + # pd_op.slice: (-1x48x49x32xf32) <- (3x-1x48x49x32xf32, 1xi64, 1xi64) + slice_209 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x48x49x32xf32) <- (3x-1x48x49x32xf32, 1xi64, 1xi64) + slice_210 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x48x49x32xf32) <- (3x-1x48x49x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x48x49x32xf32) <- (-1x48x49x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_209, full_0, float("0"), True) + del slice_209 + + # pd_op.transpose: (-1x48x32x49xf32) <- (-1x48x49x32xf32) + transpose_141 = paddle._C_ops.transpose(slice_210, [0, 1, 3, 2]) + del slice_210 + + # pd_op.matmul: (-1x48x49x49xf32) <- (-1x48x49x32xf32, -1x48x32x49xf32) + matmul_119 = paddle._C_ops.matmul(scale_23, transpose_141, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_205 = paddle._C_ops.reshape(data_47, full_int_array_12) + del data_47, full_int_array_12 + + # pd_op.index_select: (2401x48xf32) <- (169x48xf32, 2401xi64) + index_select_23 = paddle._C_ops.index_select(data_48, reshape_205, 0) + del data_48 + + # pd_op.reshape: (49x49x48xf32) <- (2401x48xf32, 3xi64) + reshape_355 = paddle._C_ops.reshape(index_select_23, full_int_array_13) + del full_int_array_13 + + # pd_op.transpose: (48x49x49xf32) <- (49x49x48xf32) + transpose_142 = paddle._C_ops.transpose(reshape_355, [2, 0, 1]) + del reshape_355 + + # pd_op.unsqueeze: (1x48x49x49xf32) <- (48x49x49xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(transpose_142, full_int_array_7) + + # pd_op.add: (-1x48x49x49xf32) <- (-1x48x49x49xf32, 1x48x49x49xf32) + add_162 = paddle._C_ops.add(matmul_119, unsqueeze_34) + + # pd_op.full: (xi64) <- () + full_77 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_11 = paddle._C_ops.floor_divide(slice_208, full_77) + del full_77 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_196 = [floor_divide_11, full_40, full_73, full_29, full_29] + del floor_divide_11 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_193 = paddle._C_ops.stack(combine_196, 0) + del combine_196 + + # pd_op.reshape: (-1x1x48x49x49xf32) <- (-1x48x49x49xf32, 5xi64) + reshape_206 = paddle._C_ops.reshape(add_162, stack_193) + del stack_193 + + # pd_op.unsqueeze: (1x1x49x49xf32) <- (1x49x49xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(where_23, full_int_array_8) + del where_23 + + # pd_op.unsqueeze: (1x1x1x49x49xf32) <- (1x1x49x49xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_72, full_int_array_7) + del unsqueeze_72 + + # pd_op.add: (-1x1x48x49x49xf32) <- (-1x1x48x49x49xf32, 1x1x1x49x49xf32) + add_163 = paddle._C_ops.add(reshape_206, unsqueeze_35) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_197 = [slice_208, full_73, full_29, full_29] + del full_73 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_194 = paddle._C_ops.stack(combine_197, 0) + del combine_197 + + # pd_op.reshape: (-1x48x49x49xf32) <- (-1x1x48x49x49xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(add_163, stack_194) + del stack_194 + + # pd_op.softmax: (-1x48x49x49xf32) <- (-1x48x49x49xf32) + softmax_23 = paddle._C_ops.softmax(reshape_356, -1) + del reshape_356 + + # pd_op.matmul: (-1x48x49x32xf32) <- (-1x48x49x49xf32, -1x48x49x32xf32) + matmul_147 = paddle._C_ops.matmul(softmax_23, slice_23, False, False) + + # pd_op.transpose: (-1x49x48x32xf32) <- (-1x48x49x32xf32) + transpose_143 = paddle._C_ops.transpose(matmul_147, [0, 2, 1, 3]) + del matmul_147 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_198 = [slice_208, full_29, full_55] + del slice_208 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_195 = paddle._C_ops.stack(combine_198, 0) + del combine_198 + + # pd_op.reshape: (-1x49x1536xf32) <- (-1x49x48x32xf32, 3xi64) + reshape_207 = paddle._C_ops.reshape(transpose_143, stack_195) + del stack_195 + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536x1536xf32) + matmul_120 = paddle._C_ops.matmul(reshape_207, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_164 = paddle._C_ops.add(matmul_120, parameter_10) + del parameter_10 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x49x1536xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(add_164, full_int_array_43) + + # pd_op.reshape: (-1x1x1x7x7x1536xf32) <- (-1x7x7x1536xf32, 6xi64) + reshape_357 = paddle._C_ops.reshape(reshape_208, full_int_array_45) + del full_int_array_45 + + # pd_op.transpose: (-1x1x7x1x7x1536xf32) <- (-1x1x1x7x7x1536xf32) + transpose_144 = paddle._C_ops.transpose(reshape_357, [0, 1, 3, 2, 4, 5]) + del reshape_357 + + # pd_op.reshape: (-1x7x7x1536xf32) <- (-1x1x7x1x7x1536xf32, 4xi64) + reshape_209 = paddle._C_ops.reshape(transpose_144, full_int_array_43) + del full_int_array_43 + + # pd_op.roll: (-1x7x7x1536xf32) <- (-1x7x7x1536xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_209, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_199 = [slice_205, full_29, full_55] + del full_29, full_55, slice_205 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_196 = paddle._C_ops.stack(combine_199, 0) + del combine_199 + + # pd_op.reshape: (-1x49x1536xf32) <- (-1x7x7x1536xf32, 3xi64) + reshape_210 = paddle._C_ops.reshape(roll_23, stack_196) + del stack_196 + + # pd_op.full: (xf32) <- () + full_24 = paddle._C_ops.full( + [], float("0.9"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_268 = full_24 + + # pd_op.shape64: (3xi64) <- (-1x49x1536xf32) + shape64_139 = paddle._C_ops.shape64(reshape_210) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_211 = paddle._C_ops.slice( + shape64_139, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_139 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_200 = [slice_211, full_40, full_40] + del slice_211 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_197 = paddle._C_ops.stack(combine_200, 0) + del combine_200 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_44 = paddle._C_ops.uniform( + stack_197, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del stack_197 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_226 = paddle._C_ops.add(full_24, uniform_44) + del uniform_44 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_44 = paddle._C_ops.floor(add_226) + del add_226 + + # pd_op.divide: (-1x49x1536xf32) <- (-1x49x1536xf32, xf32) + divide_44 = paddle._C_ops.divide(reshape_210, full_24) + + # pd_op.multiply: (-1x49x1536xf32) <- (-1x49x1536xf32, -1x1x1xf32) + multiply_44 = paddle._C_ops.multiply(divide_44, floor_44) + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, -1x49x1536xf32) + add_165 = paddle._C_ops.add(add_160, multiply_44) + + # pd_op.layer_norm: (-1x49x1536xf32, -1x49xf32, -1x49xf32) <- (-1x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_165, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (-1x49x6144xf32) <- (-1x49x1536xf32, 1536x6144xf32) + matmul_121 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (-1x49x6144xf32) <- (-1x49x6144xf32, 6144xf32) + add_166 = paddle._C_ops.add(matmul_121, parameter_6) + del parameter_6 + + # pd_op.gelu: (-1x49x6144xf32) <- (-1x49x6144xf32) + gelu_23 = paddle._C_ops.gelu(add_166, False) + + # pd_op.matmul: (-1x49x1536xf32) <- (-1x49x6144xf32, 6144x1536xf32) + matmul_122 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del parameter_5 + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, 1536xf32) + add_167 = paddle._C_ops.add(matmul_122, parameter_4) + del parameter_4 + + # pd_op.shape64: (3xi64) <- (-1x49x1536xf32) + shape64_140 = paddle._C_ops.shape64(add_167) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_212 = paddle._C_ops.slice( + shape64_140, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del full_int_array_7, full_int_array_8, shape64_140 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_201 = [slice_212, full_40, full_40] + del full_40, slice_212 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_198 = paddle._C_ops.stack(combine_201, 0) + del combine_201 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_45 = paddle._C_ops.uniform( + stack_198, + paddle.float32, + full_41, + full_42, + 0, + paddle.framework._current_expected_place(), + ) + del full_41, full_42, stack_198 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_227 = paddle._C_ops.add(full_24, uniform_45) + del uniform_45 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_45 = paddle._C_ops.floor(add_227) + del add_227 + + # pd_op.divide: (-1x49x1536xf32) <- (-1x49x1536xf32, xf32) + divide_45 = paddle._C_ops.divide(add_167, full_24) + + # pd_op.multiply: (-1x49x1536xf32) <- (-1x49x1536xf32, -1x1x1xf32) + multiply_45 = paddle._C_ops.multiply(divide_45, floor_45) + + # pd_op.add: (-1x49x1536xf32) <- (-1x49x1536xf32, -1x49x1536xf32) + add_168 = paddle._C_ops.add(add_165, multiply_45) + + # pd_op.layer_norm: (-1x49x1536xf32, -1x49xf32, -1x49xf32) <- (-1x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_158, layer_norm_156, layer_norm_157 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_168, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_2, parameter_3 + + # pd_op.transpose: (-1x1536x49xf32) <- (-1x49x1536xf32) + transpose_145 = paddle._C_ops.transpose(layer_norm_158, [0, 2, 1]) + del layer_norm_158 + + # pd_op.unsqueeze: (-1x1536x1x49xf32) <- (-1x1536x49xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_145, full_int_array_0) + + # pd_op.pool2d: (-1x1536x1x1xf32) <- (-1x1536x1x49xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_36, + full_int_array_18, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_18 + + # pd_op.squeeze: (-1x1536x1xf32) <- (-1x1536x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_0) + + # pd_op.flatten: (-1x1536xf32) <- (-1x1536x1xf32) + flatten_0 = paddle._C_ops.flatten(squeeze_0, 1, 2) + + # pd_op.matmul: (-1x102xf32) <- (-1x1536xf32, 1536x102xf32) + matmul_123 = paddle._C_ops.matmul(flatten_0, parameter_1, False, False) + del parameter_1 + + # pd_op.add: (-1x102xf32) <- (-1x102xf32, 102xf32) + add_169 = paddle._C_ops.add(matmul_123, parameter_0) + del ( + assign_0, + assign_1, + assign_10, + assign_100, + assign_101, + assign_103, + assign_105, + assign_106, + assign_107, + assign_108, + assign_109, + assign_110, + assign_111, + assign_113, + assign_114, + assign_116, + assign_117, + assign_118, + assign_119, + assign_12, + assign_120, + assign_121, + assign_123, + assign_125, + assign_126, + assign_127, + assign_128, + assign_129, + assign_130, + assign_131, + assign_133, + assign_134, + assign_136, + assign_137, + assign_138, + assign_139, + assign_14, + assign_140, + assign_141, + assign_143, + assign_145, + assign_146, + assign_147, + assign_148, + assign_149, + assign_15, + assign_150, + assign_151, + assign_153, + assign_154, + assign_156, + assign_157, + assign_158, + assign_159, + assign_16, + assign_160, + assign_161, + assign_163, + assign_165, + assign_166, + assign_167, + assign_168, + assign_169, + assign_17, + assign_170, + assign_171, + assign_173, + assign_174, + assign_176, + assign_177, + assign_178, + assign_179, + assign_18, + assign_180, + assign_181, + assign_183, + assign_185, + assign_186, + assign_187, + assign_188, + assign_189, + assign_19, + assign_190, + assign_191, + assign_193, + assign_194, + assign_196, + assign_197, + assign_198, + assign_199, + assign_2, + assign_20, + assign_200, + assign_201, + assign_203, + assign_205, + assign_206, + assign_207, + assign_208, + assign_209, + assign_21, + assign_210, + assign_211, + assign_213, + assign_214, + assign_216, + assign_217, + assign_218, + assign_219, + assign_22, + assign_220, + assign_221, + assign_223, + assign_225, + assign_226, + assign_227, + assign_228, + assign_229, + assign_23, + assign_230, + assign_231, + assign_233, + assign_234, + assign_236, + assign_237, + assign_238, + assign_239, + assign_24, + assign_240, + assign_241, + assign_242, + assign_243, + assign_244, + assign_245, + assign_246, + assign_247, + assign_249, + assign_25, + assign_250, + assign_251, + assign_252, + assign_253, + assign_254, + assign_256, + assign_258, + assign_259, + assign_26, + assign_260, + assign_261, + assign_262, + assign_263, + assign_264, + assign_266, + assign_267, + assign_269, + assign_27, + assign_270, + assign_271, + assign_28, + assign_3, + assign_30, + assign_32, + assign_33, + assign_34, + assign_35, + assign_36, + assign_37, + assign_38, + assign_4, + assign_40, + assign_41, + assign_43, + assign_44, + assign_45, + assign_46, + assign_47, + assign_48, + assign_49, + assign_5, + assign_50, + assign_51, + assign_52, + assign_53, + assign_54, + assign_56, + assign_57, + assign_58, + assign_59, + assign_6, + assign_60, + assign_61, + assign_63, + assign_65, + assign_66, + assign_67, + assign_68, + assign_69, + assign_7, + assign_70, + assign_71, + assign_73, + assign_74, + assign_76, + assign_77, + assign_78, + assign_79, + assign_8, + assign_80, + assign_81, + assign_83, + assign_85, + assign_86, + assign_87, + assign_88, + assign_89, + assign_9, + assign_90, + assign_91, + assign_93, + assign_94, + assign_96, + assign_97, + assign_98, + assign_99, + full_int_array_0, + full_int_array_1, + full_int_array_2, + full_int_array_3, + full_int_array_4, + full_int_array_5, + full_int_array_6, + parameter_0, + ) + + return ( + conv2d_0, + reshape_0, + add_0, + transpose_0, + layer_norm_0, + layer_norm_1, + layer_norm_2, + layer_norm_3, + layer_norm_4, + layer_norm_5, + reshape_1, + transpose_1, + reshape_2, + reshape_3, + matmul_0, + add_1, + transpose_2, + slice_0, + full_0, + scale_0, + transpose_3, + matmul_1, + reshape_4, + index_select_0, + transpose_4, + unsqueeze_0, + softmax_0, + transpose_5, + reshape_5, + matmul_2, + add_2, + reshape_6, + transpose_6, + reshape_7, + reshape_8, + add_3, + layer_norm_6, + layer_norm_7, + layer_norm_8, + matmul_3, + add_4, + gelu_0, + matmul_4, + add_5, + add_6, + layer_norm_9, + layer_norm_10, + layer_norm_11, + reshape_9, + roll_0, + transpose_7, + reshape_10, + reshape_11, + matmul_5, + add_7, + transpose_8, + slice_1, + assign_11, + scale_1, + transpose_9, + matmul_6, + reshape_12, + index_select_1, + transpose_10, + unsqueeze_1, + add_8, + reshape_13, + unsqueeze_2, + add_9, + softmax_1, + transpose_11, + reshape_14, + matmul_7, + add_10, + reshape_15, + transpose_12, + reshape_16, + roll_1, + reshape_17, + full_1, + floor_0, + divide_0, + multiply_0, + add_11, + layer_norm_12, + layer_norm_13, + layer_norm_14, + matmul_8, + add_12, + gelu_1, + matmul_9, + add_13, + assign_13, + floor_1, + divide_1, + multiply_1, + add_14, + reshape_18, + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + full_2, + concat_0, + reshape_19, + layer_norm_15, + layer_norm_16, + layer_norm_17, + matmul_10, + layer_norm_18, + layer_norm_19, + layer_norm_20, + reshape_20, + transpose_13, + reshape_21, + reshape_22, + matmul_11, + add_15, + transpose_14, + slice_2, + assign_29, + scale_2, + transpose_15, + matmul_12, + reshape_23, + index_select_2, + transpose_16, + unsqueeze_3, + softmax_2, + transpose_17, + reshape_24, + matmul_13, + add_16, + reshape_25, + transpose_18, + reshape_26, + reshape_27, + full_3, + floor_2, + divide_2, + multiply_2, + add_17, + layer_norm_21, + layer_norm_22, + layer_norm_23, + matmul_14, + add_18, + gelu_2, + matmul_15, + add_19, + assign_31, + floor_3, + divide_3, + multiply_3, + add_20, + layer_norm_24, + layer_norm_25, + layer_norm_26, + reshape_28, + roll_2, + transpose_19, + reshape_29, + reshape_30, + matmul_16, + add_21, + transpose_20, + slice_3, + assign_39, + scale_3, + transpose_21, + matmul_17, + reshape_31, + index_select_3, + transpose_22, + unsqueeze_4, + add_22, + reshape_32, + unsqueeze_5, + add_23, + softmax_3, + transpose_23, + reshape_33, + matmul_18, + add_24, + reshape_34, + transpose_24, + reshape_35, + roll_3, + reshape_36, + full_4, + floor_4, + divide_4, + multiply_4, + add_25, + layer_norm_27, + layer_norm_28, + layer_norm_29, + matmul_19, + add_26, + gelu_3, + matmul_20, + add_27, + assign_42, + floor_5, + divide_5, + multiply_5, + add_28, + reshape_37, + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + assign_55, + concat_1, + reshape_38, + layer_norm_30, + layer_norm_31, + layer_norm_32, + matmul_21, + layer_norm_33, + layer_norm_34, + layer_norm_35, + reshape_39, + transpose_25, + reshape_40, + reshape_41, + matmul_22, + add_29, + transpose_26, + slice_4, + assign_62, + scale_4, + transpose_27, + matmul_23, + reshape_42, + index_select_4, + transpose_28, + unsqueeze_6, + softmax_4, + transpose_29, + reshape_43, + matmul_24, + add_30, + reshape_44, + transpose_30, + reshape_45, + reshape_46, + full_5, + floor_6, + divide_6, + multiply_6, + add_31, + layer_norm_36, + layer_norm_37, + layer_norm_38, + matmul_25, + add_32, + gelu_4, + matmul_26, + add_33, + assign_64, + floor_7, + divide_7, + multiply_7, + add_34, + layer_norm_39, + layer_norm_40, + layer_norm_41, + reshape_47, + roll_4, + transpose_31, + reshape_48, + reshape_49, + matmul_27, + add_35, + transpose_32, + slice_5, + assign_72, + scale_5, + transpose_33, + matmul_28, + reshape_50, + index_select_5, + transpose_34, + unsqueeze_7, + add_36, + reshape_51, + unsqueeze_8, + add_37, + softmax_5, + transpose_35, + reshape_52, + matmul_29, + add_38, + reshape_53, + transpose_36, + reshape_54, + roll_5, + reshape_55, + full_6, + floor_8, + divide_8, + multiply_8, + add_39, + layer_norm_42, + layer_norm_43, + layer_norm_44, + matmul_30, + add_40, + gelu_5, + matmul_31, + add_41, + assign_75, + floor_9, + divide_9, + multiply_9, + add_42, + layer_norm_45, + layer_norm_46, + layer_norm_47, + reshape_56, + transpose_37, + reshape_57, + reshape_58, + matmul_32, + add_43, + transpose_38, + slice_6, + assign_82, + scale_6, + transpose_39, + matmul_33, + reshape_59, + index_select_6, + transpose_40, + unsqueeze_9, + softmax_6, + transpose_41, + reshape_60, + matmul_34, + add_44, + reshape_61, + transpose_42, + reshape_62, + reshape_63, + full_7, + floor_10, + divide_10, + multiply_10, + add_45, + layer_norm_48, + layer_norm_49, + layer_norm_50, + matmul_35, + add_46, + gelu_6, + matmul_36, + add_47, + assign_84, + floor_11, + divide_11, + multiply_11, + add_48, + layer_norm_51, + layer_norm_52, + layer_norm_53, + reshape_64, + roll_6, + transpose_43, + reshape_65, + reshape_66, + matmul_37, + add_49, + transpose_44, + slice_7, + assign_92, + scale_7, + transpose_45, + matmul_38, + reshape_67, + index_select_7, + transpose_46, + unsqueeze_10, + add_50, + reshape_68, + unsqueeze_11, + add_51, + softmax_7, + transpose_47, + reshape_69, + matmul_39, + add_52, + reshape_70, + transpose_48, + reshape_71, + roll_7, + reshape_72, + full_8, + floor_12, + divide_12, + multiply_12, + add_53, + layer_norm_54, + layer_norm_55, + layer_norm_56, + matmul_40, + add_54, + gelu_7, + matmul_41, + add_55, + assign_95, + floor_13, + divide_13, + multiply_13, + add_56, + layer_norm_57, + layer_norm_58, + layer_norm_59, + reshape_73, + transpose_49, + reshape_74, + reshape_75, + matmul_42, + add_57, + transpose_50, + slice_8, + assign_102, + scale_8, + transpose_51, + matmul_43, + reshape_76, + index_select_8, + transpose_52, + unsqueeze_12, + softmax_8, + transpose_53, + reshape_77, + matmul_44, + add_58, + reshape_78, + transpose_54, + reshape_79, + reshape_80, + full_9, + floor_14, + divide_14, + multiply_14, + add_59, + layer_norm_60, + layer_norm_61, + layer_norm_62, + matmul_45, + add_60, + gelu_8, + matmul_46, + add_61, + assign_104, + floor_15, + divide_15, + multiply_15, + add_62, + layer_norm_63, + layer_norm_64, + layer_norm_65, + reshape_81, + roll_8, + transpose_55, + reshape_82, + reshape_83, + matmul_47, + add_63, + transpose_56, + slice_9, + assign_112, + scale_9, + transpose_57, + matmul_48, + reshape_84, + index_select_9, + transpose_58, + unsqueeze_13, + add_64, + reshape_85, + unsqueeze_14, + add_65, + softmax_9, + transpose_59, + reshape_86, + matmul_49, + add_66, + reshape_87, + transpose_60, + reshape_88, + roll_9, + reshape_89, + full_10, + floor_16, + divide_16, + multiply_16, + add_67, + layer_norm_66, + layer_norm_67, + layer_norm_68, + matmul_50, + add_68, + gelu_9, + matmul_51, + add_69, + assign_115, + floor_17, + divide_17, + multiply_17, + add_70, + layer_norm_69, + layer_norm_70, + layer_norm_71, + reshape_90, + transpose_61, + reshape_91, + reshape_92, + matmul_52, + add_71, + transpose_62, + slice_10, + assign_122, + scale_10, + transpose_63, + matmul_53, + reshape_93, + index_select_10, + transpose_64, + unsqueeze_15, + softmax_10, + transpose_65, + reshape_94, + matmul_54, + add_72, + reshape_95, + transpose_66, + reshape_96, + reshape_97, + full_11, + floor_18, + divide_18, + multiply_18, + add_73, + layer_norm_72, + layer_norm_73, + layer_norm_74, + matmul_55, + add_74, + gelu_10, + matmul_56, + add_75, + assign_124, + floor_19, + divide_19, + multiply_19, + add_76, + layer_norm_75, + layer_norm_76, + layer_norm_77, + reshape_98, + roll_10, + transpose_67, + reshape_99, + reshape_100, + matmul_57, + add_77, + transpose_68, + slice_11, + assign_132, + scale_11, + transpose_69, + matmul_58, + reshape_101, + index_select_11, + transpose_70, + unsqueeze_16, + add_78, + reshape_102, + unsqueeze_17, + add_79, + softmax_11, + transpose_71, + reshape_103, + matmul_59, + add_80, + reshape_104, + transpose_72, + reshape_105, + roll_11, + reshape_106, + full_12, + floor_20, + divide_20, + multiply_20, + add_81, + layer_norm_78, + layer_norm_79, + layer_norm_80, + matmul_60, + add_82, + gelu_11, + matmul_61, + add_83, + assign_135, + floor_21, + divide_21, + multiply_21, + add_84, + layer_norm_81, + layer_norm_82, + layer_norm_83, + reshape_107, + transpose_73, + reshape_108, + reshape_109, + matmul_62, + add_85, + transpose_74, + slice_12, + assign_142, + scale_12, + transpose_75, + matmul_63, + reshape_110, + index_select_12, + transpose_76, + unsqueeze_18, + softmax_12, + transpose_77, + reshape_111, + matmul_64, + add_86, + reshape_112, + transpose_78, + reshape_113, + reshape_114, + full_13, + floor_22, + divide_22, + multiply_22, + add_87, + layer_norm_84, + layer_norm_85, + layer_norm_86, + matmul_65, + add_88, + gelu_12, + matmul_66, + add_89, + assign_144, + floor_23, + divide_23, + multiply_23, + add_90, + layer_norm_87, + layer_norm_88, + layer_norm_89, + reshape_115, + roll_12, + transpose_79, + reshape_116, + reshape_117, + matmul_67, + add_91, + transpose_80, + slice_13, + assign_152, + scale_13, + transpose_81, + matmul_68, + reshape_118, + index_select_13, + transpose_82, + unsqueeze_19, + add_92, + reshape_119, + unsqueeze_20, + add_93, + softmax_13, + transpose_83, + reshape_120, + matmul_69, + add_94, + reshape_121, + transpose_84, + reshape_122, + roll_13, + reshape_123, + full_14, + floor_24, + divide_24, + multiply_24, + add_95, + layer_norm_90, + layer_norm_91, + layer_norm_92, + matmul_70, + add_96, + gelu_13, + matmul_71, + add_97, + assign_155, + floor_25, + divide_25, + multiply_25, + add_98, + layer_norm_93, + layer_norm_94, + layer_norm_95, + reshape_124, + transpose_85, + reshape_125, + reshape_126, + matmul_72, + add_99, + transpose_86, + slice_14, + assign_162, + scale_14, + transpose_87, + matmul_73, + reshape_127, + index_select_14, + transpose_88, + unsqueeze_21, + softmax_14, + transpose_89, + reshape_128, + matmul_74, + add_100, + reshape_129, + transpose_90, + reshape_130, + reshape_131, + full_15, + floor_26, + divide_26, + multiply_26, + add_101, + layer_norm_96, + layer_norm_97, + layer_norm_98, + matmul_75, + add_102, + gelu_14, + matmul_76, + add_103, + assign_164, + floor_27, + divide_27, + multiply_27, + add_104, + layer_norm_99, + layer_norm_100, + layer_norm_101, + reshape_132, + roll_14, + transpose_91, + reshape_133, + reshape_134, + matmul_77, + add_105, + transpose_92, + slice_15, + assign_172, + scale_15, + transpose_93, + matmul_78, + reshape_135, + index_select_15, + transpose_94, + unsqueeze_22, + add_106, + reshape_136, + unsqueeze_23, + add_107, + softmax_15, + transpose_95, + reshape_137, + matmul_79, + add_108, + reshape_138, + transpose_96, + reshape_139, + roll_15, + reshape_140, + full_16, + floor_28, + divide_28, + multiply_28, + add_109, + layer_norm_102, + layer_norm_103, + layer_norm_104, + matmul_80, + add_110, + gelu_15, + matmul_81, + add_111, + assign_175, + floor_29, + divide_29, + multiply_29, + add_112, + layer_norm_105, + layer_norm_106, + layer_norm_107, + reshape_141, + transpose_97, + reshape_142, + reshape_143, + matmul_82, + add_113, + transpose_98, + slice_16, + assign_182, + scale_16, + transpose_99, + matmul_83, + reshape_144, + index_select_16, + transpose_100, + unsqueeze_24, + softmax_16, + transpose_101, + reshape_145, + matmul_84, + add_114, + reshape_146, + transpose_102, + reshape_147, + reshape_148, + full_17, + floor_30, + divide_30, + multiply_30, + add_115, + layer_norm_108, + layer_norm_109, + layer_norm_110, + matmul_85, + add_116, + gelu_16, + matmul_86, + add_117, + assign_184, + floor_31, + divide_31, + multiply_31, + add_118, + layer_norm_111, + layer_norm_112, + layer_norm_113, + reshape_149, + roll_16, + transpose_103, + reshape_150, + reshape_151, + matmul_87, + add_119, + transpose_104, + slice_17, + assign_192, + scale_17, + transpose_105, + matmul_88, + reshape_152, + index_select_17, + transpose_106, + unsqueeze_25, + add_120, + reshape_153, + unsqueeze_26, + add_121, + softmax_17, + transpose_107, + reshape_154, + matmul_89, + add_122, + reshape_155, + transpose_108, + reshape_156, + roll_17, + reshape_157, + full_18, + floor_32, + divide_32, + multiply_32, + add_123, + layer_norm_114, + layer_norm_115, + layer_norm_116, + matmul_90, + add_124, + gelu_17, + matmul_91, + add_125, + assign_195, + floor_33, + divide_33, + multiply_33, + add_126, + layer_norm_117, + layer_norm_118, + layer_norm_119, + reshape_158, + transpose_109, + reshape_159, + reshape_160, + matmul_92, + add_127, + transpose_110, + slice_18, + assign_202, + scale_18, + transpose_111, + matmul_93, + reshape_161, + index_select_18, + transpose_112, + unsqueeze_27, + softmax_18, + transpose_113, + reshape_162, + matmul_94, + add_128, + reshape_163, + transpose_114, + reshape_164, + reshape_165, + full_19, + floor_34, + divide_34, + multiply_34, + add_129, + layer_norm_120, + layer_norm_121, + layer_norm_122, + matmul_95, + add_130, + gelu_18, + matmul_96, + add_131, + assign_204, + floor_35, + divide_35, + multiply_35, + add_132, + layer_norm_123, + layer_norm_124, + layer_norm_125, + reshape_166, + roll_18, + transpose_115, + reshape_167, + reshape_168, + matmul_97, + add_133, + transpose_116, + slice_19, + assign_212, + scale_19, + transpose_117, + matmul_98, + reshape_169, + index_select_19, + transpose_118, + unsqueeze_28, + add_134, + reshape_170, + unsqueeze_29, + add_135, + softmax_19, + transpose_119, + reshape_171, + matmul_99, + add_136, + reshape_172, + transpose_120, + reshape_173, + roll_19, + reshape_174, + full_20, + floor_36, + divide_36, + multiply_36, + add_137, + layer_norm_126, + layer_norm_127, + layer_norm_128, + matmul_100, + add_138, + gelu_19, + matmul_101, + add_139, + assign_215, + floor_37, + divide_37, + multiply_37, + add_140, + layer_norm_129, + layer_norm_130, + layer_norm_131, + reshape_175, + transpose_121, + reshape_176, + reshape_177, + matmul_102, + add_141, + transpose_122, + slice_20, + assign_222, + scale_20, + transpose_123, + matmul_103, + reshape_178, + index_select_20, + transpose_124, + unsqueeze_30, + softmax_20, + transpose_125, + reshape_179, + matmul_104, + add_142, + reshape_180, + transpose_126, + reshape_181, + reshape_182, + full_21, + floor_38, + divide_38, + multiply_38, + add_143, + layer_norm_132, + layer_norm_133, + layer_norm_134, + matmul_105, + add_144, + gelu_20, + matmul_106, + add_145, + assign_224, + floor_39, + divide_39, + multiply_39, + add_146, + layer_norm_135, + layer_norm_136, + layer_norm_137, + reshape_183, + roll_20, + transpose_127, + reshape_184, + reshape_185, + matmul_107, + add_147, + transpose_128, + slice_21, + assign_232, + scale_21, + transpose_129, + matmul_108, + reshape_186, + index_select_21, + transpose_130, + unsqueeze_31, + add_148, + reshape_187, + unsqueeze_32, + add_149, + softmax_21, + transpose_131, + reshape_188, + matmul_109, + add_150, + reshape_189, + transpose_132, + reshape_190, + roll_21, + reshape_191, + full_22, + floor_40, + divide_40, + multiply_40, + add_151, + layer_norm_138, + layer_norm_139, + layer_norm_140, + matmul_110, + add_152, + gelu_21, + matmul_111, + add_153, + assign_235, + floor_41, + divide_41, + multiply_41, + add_154, + reshape_192, + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + assign_248, + concat_2, + reshape_193, + layer_norm_141, + layer_norm_142, + layer_norm_143, + matmul_112, + layer_norm_144, + layer_norm_145, + layer_norm_146, + reshape_194, + transpose_133, + reshape_195, + reshape_196, + matmul_113, + add_155, + transpose_134, + slice_22, + assign_255, + scale_22, + transpose_135, + matmul_114, + reshape_197, + index_select_22, + transpose_136, + unsqueeze_33, + softmax_22, + transpose_137, + reshape_198, + matmul_115, + add_156, + reshape_199, + transpose_138, + reshape_200, + reshape_201, + full_23, + floor_42, + divide_42, + multiply_42, + add_157, + layer_norm_147, + layer_norm_148, + layer_norm_149, + matmul_116, + add_158, + gelu_22, + matmul_117, + add_159, + assign_257, + floor_43, + divide_43, + multiply_43, + add_160, + layer_norm_150, + layer_norm_151, + layer_norm_152, + reshape_202, + roll_22, + transpose_139, + reshape_203, + reshape_204, + matmul_118, + add_161, + transpose_140, + slice_23, + assign_265, + scale_23, + transpose_141, + matmul_119, + reshape_205, + index_select_23, + transpose_142, + unsqueeze_34, + add_162, + reshape_206, + unsqueeze_35, + add_163, + softmax_23, + transpose_143, + reshape_207, + matmul_120, + add_164, + reshape_208, + transpose_144, + reshape_209, + roll_23, + reshape_210, + full_24, + floor_44, + divide_44, + multiply_44, + add_165, + layer_norm_153, + layer_norm_154, + layer_norm_155, + matmul_121, + add_166, + gelu_23, + matmul_122, + add_167, + assign_268, + floor_45, + divide_45, + multiply_45, + add_168, + layer_norm_156, + layer_norm_157, + transpose_145, + unsqueeze_36, + pool2d_0, + squeeze_0, + flatten_0, + matmul_123, + add_169, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/weight_meta.py new file mode 100644 index 00000000..88a6a1a4 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_large_patch4_window7_224/subgraph_2/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [1536, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [6144, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1536, 6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [1536, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1536, 4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [6144, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [1536, 6144] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1536, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [1536, 4608] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [3072, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [1536, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [768, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [192, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/graph_net.json b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/input_meta.py b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/input_meta.py new file mode 100644 index 00000000..a92f4145 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [64, 3, 224, 224] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [169, 12] + dtype = "float32" + low = -9.32025 + high = 3.19265 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [169, 12] + dtype = "float32" + low = -4.72291 + high = 3.60727 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [169, 12] + dtype = "float32" + low = -9.57262 + high = 2.48781 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [169, 12] + dtype = "float32" + low = -5.86546 + high = 2.53926 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [169, 12] + dtype = "float32" + low = -11.2008 + high = 1.83809 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [169, 12] + dtype = "float32" + low = -11.4129 + high = 2.56733 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [169, 12] + dtype = "float32" + low = -12.6651 + high = 1.55338 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [169, 12] + dtype = "float32" + low = -6.02371 + high = 2.94348 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [169, 6] + dtype = "float32" + low = -4.59034 + high = 7.89396 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [169, 3] + dtype = "float32" + low = -10.8562 + high = 3.30781 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [169, 6] + dtype = "float32" + low = -4.7555 + high = 5.78708 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [169, 12] + dtype = "float32" + low = -8.4496 + high = 4.77939 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [169, 12] + dtype = "float32" + low = -11.1686 + high = 10.3609 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [169, 12] + dtype = "float32" + low = -9.9087 + high = 4.3333 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [169, 12] + dtype = "float32" + low = -8.18147 + high = 3.88861 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [169, 12] + dtype = "float32" + low = -5.04052 + high = 3.20353 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [169, 12] + dtype = "float32" + low = -6.80778 + high = 4.11249 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [169, 12] + dtype = "float32" + low = -10.131 + high = 4.64307 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [169, 12] + dtype = "float32" + low = -5.80667 + high = 3.5609 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [169, 12] + dtype = "float32" + low = -10.4214 + high = 2.89938 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [169, 3] + dtype = "float32" + low = -10.721 + high = 3.21823 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [169, 12] + dtype = "float32" + low = -8.23036 + high = 3.1619 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/model.py b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/model.py new file mode 100644 index 00000000..7287bf25 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/model.py @@ -0,0 +1,10708 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.conv2d: (64x96x56x56xf32) <- (64x3x224x224xf32, 96x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, -1, 1, 1] + + # pd_op.reshape: (1x96x1x1xf32) <- (96xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_9) + del full_int_array_9, parameter_303 + + # pd_op.add: (64x96x56x56xf32) <- (64x96x56x56xf32, 1x96x1x1xf32) + add_0 = paddle._C_ops.add(conv2d_0, reshape_0) + + # pd_op.flatten: (64x96x3136xf32) <- (64x96x56x56xf32) + flatten_1 = paddle._C_ops.flatten(add_0, 2, 3) + + # pd_op.transpose: (64x3136x96xf32) <- (64x96x3136xf32) + transpose_0 = paddle._C_ops.transpose(flatten_1, [0, 2, 1]) + del flatten_1 + + # pd_op.layer_norm: (64x3136x96xf32, 64x3136xf32, 64x3136xf32) <- (64x3136x96xf32, 96xf32, 96xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302 + + # pd_op.layer_norm: (64x3136x96xf32, 64x3136xf32, 64x3136xf32) <- (64x3136x96xf32, 96xf32, 96xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [64, 56, 56, 96] + + # pd_op.reshape: (64x56x56x96xf32) <- (64x3136x96xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, full_int_array_10) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_11 = [64, 8, 7, 8, 7, 96] + + # pd_op.reshape: (64x8x7x8x7x96xf32) <- (64x56x56x96xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_1, full_int_array_11) + + # pd_op.transpose: (64x8x8x7x7x96xf32) <- (64x8x7x8x7x96xf32) + transpose_1 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_12 = [-1, 7, 7, 96] + + # pd_op.reshape: (4096x7x7x96xf32) <- (64x8x8x7x7x96xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(transpose_1, full_int_array_12) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_13 = [-1, 49, 96] + + # pd_op.reshape: (4096x49x96xf32) <- (4096x7x7x96xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(reshape_2, full_int_array_13) + + # pd_op.matmul: (4096x49x288xf32) <- (4096x49x96xf32, 96x288xf32) + matmul_0 = paddle._C_ops.matmul(reshape_3, parameter_298, False, False) + del parameter_298 + + # pd_op.add: (4096x49x288xf32) <- (4096x49x288xf32, 288xf32) + add_1 = paddle._C_ops.add(matmul_0, parameter_297) + del parameter_297 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_14 = [4096, 49, 3, 3, 32] + + # pd_op.reshape: (4096x49x3x3x32xf32) <- (4096x49x288xf32, 5xi64) + reshape_212 = paddle._C_ops.reshape(add_1, full_int_array_14) + + # pd_op.transpose: (3x4096x3x49x32xf32) <- (4096x49x3x3x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_212, [2, 0, 3, 1, 4]) + del reshape_212 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_264 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_257 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_254 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_247 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_231 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_224 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_221 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_214 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_211 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_204 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_201 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_194 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_191 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_184 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_181 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_174 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_171 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_164 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_161 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_154 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_151 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_144 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_141 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_134 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_131 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_124 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_121 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_114 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_111 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_104 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_101 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_94 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_91 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_84 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_81 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_74 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_71 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_64 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_61 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_54 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_38 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_31 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_28 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_21 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_10 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_3 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_2 = full_int_array_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [1] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_259 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_258 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_249 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_248 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_226 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_225 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_216 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_215 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_206 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_205 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_196 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_195 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_186 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_185 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_176 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_175 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_166 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_165 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_156 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_155 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_146 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_145 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_136 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_135 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_126 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_125 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_116 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_115 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_106 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_105 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_96 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_95 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_86 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_85 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_76 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_75 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_66 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_65 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_56 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_55 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_33 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_32 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_23 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_22 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_5 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_4 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_0 = full_int_array_1 + + # pd_op.slice: (4096x3x49x32xf32) <- (3x4096x3x49x32xf32, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [2] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_269 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_267 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_261 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_260 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_251 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_250 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_228 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_227 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_218 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_217 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_208 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_207 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_198 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_197 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_188 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_187 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_178 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_177 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_168 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_167 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_158 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_157 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_148 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_147 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_138 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_137 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_128 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_127 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_118 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_117 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_108 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_107 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_98 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_97 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_88 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_87 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_78 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_77 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_68 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_67 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_58 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_57 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_35 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_34 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_25 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_24 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_7 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_6 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_1 = full_int_array_2 + + # pd_op.slice: (4096x3x49x32xf32) <- (3x4096x3x49x32xf32, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [3] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_262 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_252 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_229 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_219 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_209 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_199 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_189 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_179 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_169 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_159 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_149 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_139 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_129 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_119 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_109 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_99 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_89 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_79 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_69 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_59 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_36 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_26 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_8 = full_int_array_3 + + # pd_op.slice: (4096x3x49x32xf32) <- (3x4096x3x49x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xf32) <- (1xf32) + assign_263 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_253 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_230 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_220 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_210 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_200 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_190 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_180 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_170 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_160 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_150 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_140 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_130 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_120 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_110 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_100 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_90 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_80 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_70 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_60 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_37 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_27 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_9 = full_0 + + # pd_op.scale: (4096x3x49x32xf32) <- (4096x3x49x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_24, full_0, float("0"), True) + del slice_24 + + # pd_op.transpose: (4096x3x32x49xf32) <- (4096x3x49x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_25, [0, 1, 3, 2]) + del slice_25 + + # pd_op.matmul: (4096x3x49x49xf32) <- (4096x3x49x32xf32, 4096x3x32x49xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_15 = [-1] + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_4 = paddle._C_ops.reshape(data_23, full_int_array_15) + del data_23 + + # pd_op.index_select: (2401x3xf32) <- (169x3xf32, 2401xi64) + index_select_0 = paddle._C_ops.index_select(data_24, reshape_4, 0) + del data_24 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_16 = [49, 49, -1] + + # pd_op.reshape: (49x49x3xf32) <- (2401x3xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(index_select_0, full_int_array_16) + + # pd_op.transpose: (3x49x49xf32) <- (49x49x3xf32) + transpose_4 = paddle._C_ops.transpose(reshape_213, [2, 0, 1]) + del reshape_213 + + # pd_op.unsqueeze: (1x3x49x49xf32) <- (3x49x49xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_0) + + # pd_op.add: (4096x3x49x49xf32) <- (4096x3x49x49xf32, 1x3x49x49xf32) + add_170 = paddle._C_ops.add(matmul_1, unsqueeze_0) + + # pd_op.softmax: (4096x3x49x49xf32) <- (4096x3x49x49xf32) + softmax_0 = paddle._C_ops.softmax(add_170, -1) + del add_170 + + # pd_op.matmul: (4096x3x49x32xf32) <- (4096x3x49x49xf32, 4096x3x49x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_0, slice_0, False, False) + + # pd_op.transpose: (4096x49x3x32xf32) <- (4096x3x49x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_17 = [4096, 49, 96] + + # pd_op.reshape: (4096x49x96xf32) <- (4096x49x3x32xf32, 3xi64) + reshape_5 = paddle._C_ops.reshape(transpose_5, full_int_array_17) + + # pd_op.matmul: (4096x49x96xf32) <- (4096x49x96xf32, 96x96xf32) + matmul_2 = paddle._C_ops.matmul(reshape_5, parameter_296, False, False) + del parameter_296 + + # pd_op.add: (4096x49x96xf32) <- (4096x49x96xf32, 96xf32) + add_2 = paddle._C_ops.add(matmul_2, parameter_295) + del parameter_295 + + # pd_op.reshape: (4096x7x7x96xf32) <- (4096x49x96xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_2, full_int_array_12) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_18 = [-1, 8, 8, 7, 7, 96] + + # pd_op.reshape: (64x8x8x7x7x96xf32) <- (4096x7x7x96xf32, 6xi64) + reshape_214 = paddle._C_ops.reshape(reshape_6, full_int_array_18) + + # pd_op.transpose: (64x8x7x8x7x96xf32) <- (64x8x8x7x7x96xf32) + transpose_6 = paddle._C_ops.transpose(reshape_214, [0, 1, 3, 2, 4, 5]) + del reshape_214 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_19 = [-1, 56, 56, 96] + + # pd_op.reshape: (64x56x56x96xf32) <- (64x8x7x8x7x96xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(transpose_6, full_int_array_19) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_20 = [64, 3136, 96] + + # pd_op.reshape: (64x3136x96xf32) <- (64x56x56x96xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(reshape_7, full_int_array_20) + + # pd_op.add: (64x3136x96xf32) <- (64x3136x96xf32, 64x3136x96xf32) + add_3 = paddle._C_ops.add(layer_norm_0, reshape_8) + + # pd_op.layer_norm: (64x3136x96xf32, 64x3136xf32, 64x3136xf32) <- (64x3136x96xf32, 96xf32, 96xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_3, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (64x3136x384xf32) <- (64x3136x96xf32, 96x384xf32) + matmul_3 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del parameter_292 + + # pd_op.add: (64x3136x384xf32) <- (64x3136x384xf32, 384xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_291) + del parameter_291 + + # pd_op.gelu: (64x3136x384xf32) <- (64x3136x384xf32) + gelu_0 = paddle._C_ops.gelu(add_4, False) + + # pd_op.matmul: (64x3136x96xf32) <- (64x3136x384xf32, 384x96xf32) + matmul_4 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del parameter_290 + + # pd_op.add: (64x3136x96xf32) <- (64x3136x96xf32, 96xf32) + add_5 = paddle._C_ops.add(matmul_4, parameter_289) + del parameter_289 + + # pd_op.add: (64x3136x96xf32) <- (64x3136x96xf32, 64x3136x96xf32) + add_6 = paddle._C_ops.add(add_3, add_5) + + # pd_op.layer_norm: (64x3136x96xf32, 64x3136xf32, 64x3136xf32) <- (64x3136x96xf32, 96xf32, 96xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # pd_op.reshape: (64x56x56x96xf32) <- (64x3136x96xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(layer_norm_9, full_int_array_10) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_4 = [-3, -3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_256 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_223 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_203 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_183 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_163 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_143 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_123 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_103 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_83 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_63 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_30 = full_int_array_4 + + # pd_op.roll: (64x56x56x96xf32) <- (64x56x56x96xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_9, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x8x7x8x7x96xf32) <- (64x56x56x96xf32, 6xi64) + reshape_215 = paddle._C_ops.reshape(roll_0, full_int_array_11) + del full_int_array_11 + + # pd_op.transpose: (64x8x8x7x7x96xf32) <- (64x8x7x8x7x96xf32) + transpose_7 = paddle._C_ops.transpose(reshape_215, [0, 1, 3, 2, 4, 5]) + del reshape_215 + + # pd_op.reshape: (4096x7x7x96xf32) <- (64x8x8x7x7x96xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(transpose_7, full_int_array_12) + + # pd_op.reshape: (4096x49x96xf32) <- (4096x7x7x96xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(reshape_10, full_int_array_13) + del full_int_array_13 + + # pd_op.full: (1x56x56x1xf32) <- () + full_25 = paddle._C_ops.full( + [1, 56, 56, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [0, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_234 = full_int_array_21 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_41 = full_int_array_21 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_12 = full_int_array_21 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-7, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [1, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_268 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_243 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_50 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_18 = full_int_array_23 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_25, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_25 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [0, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [-7, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_26 = [0, -3] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_27 = [-7, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [-7, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [-3, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_30 = [-3, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_31 = [-3, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [2147483647, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_33 = [2147483647, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_34 = [2147483647, 2147483647] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_244 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_241 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_238 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_235 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_51 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_48 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_45 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_42 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_19 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_16 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_14 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_13 = full_int_array_34 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [1, 8, 7, 8, 7, 1] + + # pd_op.reshape: (1x8x7x8x7x1xf32) <- (1x56x56x1xf32, 6xi64) + reshape_216 = paddle._C_ops.reshape(set_value__0, full_int_array_35) + del full_int_array_35 + + # pd_op.transpose: (1x8x8x7x7x1xf32) <- (1x8x7x8x7x1xf32) + transpose_146 = paddle._C_ops.transpose(reshape_216, [0, 1, 3, 2, 4, 5]) + del reshape_216 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 7, 7, 1] + + # pd_op.reshape: (64x7x7x1xf32) <- (1x8x8x7x7x1xf32, 4xi64) + reshape_217 = paddle._C_ops.reshape(transpose_146, full_int_array_36) + del transpose_146 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_37 = [-1, 49] + + # pd_op.reshape: (64x49xf32) <- (64x7x7x1xf32, 2xi64) + reshape_218 = paddle._C_ops.reshape(reshape_217, full_int_array_37) + del reshape_217 + + # pd_op.unsqueeze: (64x1x49xf32) <- (64x49xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_1) + + # pd_op.unsqueeze: (64x49x1xf32) <- (64x49xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_2) + del reshape_218 + + # pd_op.subtract: (64x49x49xf32) <- (64x1x49xf32, 64x49x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.full: (xf32) <- () + full_26 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x49x49xb) <- (64x49x49xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_26) + + # pd_op.full: (64x49x49xf32) <- () + full_27 = paddle._C_ops.full( + [64, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_27, subtract_0) + del full_27, not_equal_0, subtract_0 + + # pd_op.equal: (64x49x49xb) <- (64x49x49xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_26) + + # pd_op.full: (64x49x49xf32) <- () + full_28 = paddle._C_ops.full( + [64, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_1 = paddle._C_ops.where(equal_0, full_28, where_0) + del equal_0, full_28, where_0 + + # pd_op.matmul: (4096x49x288xf32) <- (4096x49x96xf32, 96x288xf32) + matmul_5 = paddle._C_ops.matmul(reshape_11, parameter_286, False, False) + del parameter_286 + + # pd_op.add: (4096x49x288xf32) <- (4096x49x288xf32, 288xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_285) + del parameter_285 + + # pd_op.reshape: (4096x49x3x3x32xf32) <- (4096x49x288xf32, 5xi64) + reshape_219 = paddle._C_ops.reshape(add_7, full_int_array_14) + del full_int_array_14 + + # pd_op.transpose: (3x4096x3x49x32xf32) <- (4096x49x3x3x32xf32) + transpose_8 = paddle._C_ops.transpose(reshape_219, [2, 0, 3, 1, 4]) + del reshape_219 + + # pd_op.slice: (4096x3x49x32xf32) <- (3x4096x3x49x32xf32, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (4096x3x49x32xf32) <- (3x4096x3x49x32xf32, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (4096x3x49x32xf32) <- (3x4096x3x49x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (4096x3x49x32xf32) <- (4096x3x49x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_26, full_0, float("0"), True) + del slice_26 + + # pd_op.transpose: (4096x3x32x49xf32) <- (4096x3x49x32xf32) + transpose_9 = paddle._C_ops.transpose(slice_27, [0, 1, 3, 2]) + del slice_27 + + # pd_op.matmul: (4096x3x49x49xf32) <- (4096x3x49x32xf32, 4096x3x32x49xf32) + matmul_6 = paddle._C_ops.matmul(scale_1, transpose_9, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_12 = paddle._C_ops.reshape(data_43, full_int_array_15) + del data_43 + + # pd_op.index_select: (2401x3xf32) <- (169x3xf32, 2401xi64) + index_select_1 = paddle._C_ops.index_select(data_46, reshape_12, 0) + del data_46 + + # pd_op.reshape: (49x49x3xf32) <- (2401x3xf32, 3xi64) + reshape_220 = paddle._C_ops.reshape(index_select_1, full_int_array_16) + + # pd_op.transpose: (3x49x49xf32) <- (49x49x3xf32) + transpose_10 = paddle._C_ops.transpose(reshape_220, [2, 0, 1]) + del reshape_220 + + # pd_op.unsqueeze: (1x3x49x49xf32) <- (3x49x49xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(transpose_10, full_int_array_0) + + # pd_op.add: (4096x3x49x49xf32) <- (4096x3x49x49xf32, 1x3x49x49xf32) + add_8 = paddle._C_ops.add(matmul_6, unsqueeze_1) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_38 = [64, 64, 3, 49, 49] + + # pd_op.reshape: (64x64x3x49x49xf32) <- (4096x3x49x49xf32, 5xi64) + reshape_13 = paddle._C_ops.reshape(add_8, full_int_array_38) + del full_int_array_38 + + # pd_op.unsqueeze: (64x1x49x49xf32) <- (64x49x49xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(where_1, full_int_array_1) + del where_1 + + # pd_op.unsqueeze: (1x64x1x49x49xf32) <- (64x1x49x49xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(unsqueeze_39, full_int_array_0) + del unsqueeze_39 + + # pd_op.add: (64x64x3x49x49xf32) <- (64x64x3x49x49xf32, 1x64x1x49x49xf32) + add_9 = paddle._C_ops.add(reshape_13, unsqueeze_2) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_39 = [4096, 3, 49, 49] + + # pd_op.reshape: (4096x3x49x49xf32) <- (64x64x3x49x49xf32, 4xi64) + reshape_221 = paddle._C_ops.reshape(add_9, full_int_array_39) + del full_int_array_39 + + # pd_op.softmax: (4096x3x49x49xf32) <- (4096x3x49x49xf32) + softmax_1 = paddle._C_ops.softmax(reshape_221, -1) + del reshape_221 + + # pd_op.matmul: (4096x3x49x32xf32) <- (4096x3x49x49xf32, 4096x3x49x32xf32) + matmul_125 = paddle._C_ops.matmul(softmax_1, slice_1, False, False) + + # pd_op.transpose: (4096x49x3x32xf32) <- (4096x3x49x32xf32) + transpose_11 = paddle._C_ops.transpose(matmul_125, [0, 2, 1, 3]) + del matmul_125 + + # pd_op.reshape: (4096x49x96xf32) <- (4096x49x3x32xf32, 3xi64) + reshape_14 = paddle._C_ops.reshape(transpose_11, full_int_array_17) + del full_int_array_17 + + # pd_op.matmul: (4096x49x96xf32) <- (4096x49x96xf32, 96x96xf32) + matmul_7 = paddle._C_ops.matmul(reshape_14, parameter_284, False, False) + del parameter_284 + + # pd_op.add: (4096x49x96xf32) <- (4096x49x96xf32, 96xf32) + add_10 = paddle._C_ops.add(matmul_7, parameter_283) + del parameter_283 + + # pd_op.reshape: (4096x7x7x96xf32) <- (4096x49x96xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(add_10, full_int_array_12) + del full_int_array_12 + + # pd_op.reshape: (64x8x8x7x7x96xf32) <- (4096x7x7x96xf32, 6xi64) + reshape_222 = paddle._C_ops.reshape(reshape_15, full_int_array_18) + del full_int_array_18 + + # pd_op.transpose: (64x8x7x8x7x96xf32) <- (64x8x8x7x7x96xf32) + transpose_12 = paddle._C_ops.transpose(reshape_222, [0, 1, 3, 2, 4, 5]) + del reshape_222 + + # pd_op.reshape: (64x56x56x96xf32) <- (64x8x7x8x7x96xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(transpose_12, full_int_array_19) + del full_int_array_19 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [3, 3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_265 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_232 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_212 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_192 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_172 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_152 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_132 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_112 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_92 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_72 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_39 = full_int_array_5 + + # pd_op.roll: (64x56x56x96xf32) <- (64x56x56x96xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_16, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x3136x96xf32) <- (64x56x56x96xf32, 3xi64) + reshape_17 = paddle._C_ops.reshape(roll_1, full_int_array_20) + del full_int_array_20 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], + float("0.986957"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_11 = full_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_40 = [64, 1, 1] + + # pd_op.full: (1xf32) <- () + full_29 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf32) <- () + full_30 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_0 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_171 = paddle._C_ops.add(full_1, uniform_0) + del uniform_0 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_0 = paddle._C_ops.floor(add_171) + del add_171 + + # pd_op.divide: (64x3136x96xf32) <- (64x3136x96xf32, xf32) + divide_0 = paddle._C_ops.divide(reshape_17, full_1) + + # pd_op.multiply: (64x3136x96xf32) <- (64x3136x96xf32, 64x1x1xf32) + multiply_0 = paddle._C_ops.multiply(divide_0, floor_0) + + # pd_op.add: (64x3136x96xf32) <- (64x3136x96xf32, 64x3136x96xf32) + add_11 = paddle._C_ops.add(add_6, multiply_0) + + # pd_op.layer_norm: (64x3136x96xf32, 64x3136xf32, 64x3136xf32) <- (64x3136x96xf32, 96xf32, 96xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (64x3136x384xf32) <- (64x3136x96xf32, 96x384xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del parameter_280 + + # pd_op.add: (64x3136x384xf32) <- (64x3136x384xf32, 384xf32) + add_12 = paddle._C_ops.add(matmul_8, parameter_279) + del parameter_279 + + # pd_op.gelu: (64x3136x384xf32) <- (64x3136x384xf32) + gelu_1 = paddle._C_ops.gelu(add_12, False) + + # pd_op.matmul: (64x3136x96xf32) <- (64x3136x384xf32, 384x96xf32) + matmul_9 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del parameter_278 + + # pd_op.add: (64x3136x96xf32) <- (64x3136x96xf32, 96xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_277) + del parameter_277 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_1 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_172 = paddle._C_ops.add(full_1, uniform_1) + del uniform_1 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_1 = paddle._C_ops.floor(add_172) + del add_172 + + # pd_op.divide: (64x3136x96xf32) <- (64x3136x96xf32, xf32) + divide_1 = paddle._C_ops.divide(add_13, full_1) + + # pd_op.multiply: (64x3136x96xf32) <- (64x3136x96xf32, 64x1x1xf32) + multiply_1 = paddle._C_ops.multiply(divide_1, floor_1) + + # pd_op.add: (64x3136x96xf32) <- (64x3136x96xf32, 64x3136x96xf32) + add_14 = paddle._C_ops.add(add_11, multiply_1) + + # pd_op.reshape: (64x56x56x96xf32) <- (64x3136x96xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_14, full_int_array_10) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_6 = [2, 2] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_245 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_242 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_239 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_236 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_52 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_49 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_46 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_43 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_20 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_17 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_15 = full_int_array_6 + + # pd_op.strided_slice: (64x28x28x96xf32) <- (64x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_7 = [1, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_237 = full_int_array_7 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_44 = full_int_array_7 + + # pd_op.strided_slice: (64x28x28x96xf32) <- (64x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_8 = [0, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_240 = full_int_array_8 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_47 = full_int_array_8 + + # pd_op.strided_slice: (64x28x28x96xf32) <- (64x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x28x28x96xf32) <- (64x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (64x56x56x96xf32) <- (64x56x56x96xf32, 4xi64) + reshape_223 = paddle._C_ops.reshape(reshape_18, full_int_array_10) + del full_int_array_10 + + # pd_op.full: (1xi32) <- () + full_2 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xi32) <- (1xi32) + assign_246 = full_2 + + # pd_op.assign: (1xi32) <- (1xi32) + assign_53 = full_2 + + # builtin.combine: ([64x28x28x96xf32, 64x28x28x96xf32, 64x28x28x96xf32, 64x28x28x96xf32]) <- (64x28x28x96xf32, 64x28x28x96xf32, 64x28x28x96xf32, 64x28x28x96xf32) + combine_0 = [strided_slice_0, strided_slice_1, strided_slice_2, strided_slice_3] + + # pd_op.concat: (64x28x28x384xf32) <- ([64x28x28x96xf32, 64x28x28x96xf32, 64x28x28x96xf32, 64x28x28x96xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_0, full_2) + del combine_0 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_41 = [64, -1, 384] + + # pd_op.reshape: (64x784x384xf32) <- (64x28x28x384xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(concat_0, full_int_array_41) + del full_int_array_41 + + # pd_op.layer_norm: (64x784x384xf32, 64x784xf32, 64x784xf32) <- (64x784x384xf32, 384xf32, 384xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_19, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276 + + # pd_op.matmul: (64x784x192xf32) <- (64x784x384xf32, 384x192xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del parameter_274 + + # pd_op.layer_norm: (64x784x192xf32, 64x784xf32, 64x784xf32) <- (64x784x192xf32, 192xf32, 192xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_10, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_42 = [64, 28, 28, 192] + + # pd_op.reshape: (64x28x28x192xf32) <- (64x784x192xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(layer_norm_18, full_int_array_42) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_43 = [64, 4, 7, 4, 7, 192] + + # pd_op.reshape: (64x4x7x4x7x192xf32) <- (64x28x28x192xf32, 6xi64) + reshape_224 = paddle._C_ops.reshape(reshape_20, full_int_array_43) + + # pd_op.transpose: (64x4x4x7x7x192xf32) <- (64x4x7x4x7x192xf32) + transpose_13 = paddle._C_ops.transpose(reshape_224, [0, 1, 3, 2, 4, 5]) + del reshape_224 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_44 = [-1, 7, 7, 192] + + # pd_op.reshape: (1024x7x7x192xf32) <- (64x4x4x7x7x192xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(transpose_13, full_int_array_44) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_45 = [-1, 49, 192] + + # pd_op.reshape: (1024x49x192xf32) <- (1024x7x7x192xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(reshape_21, full_int_array_45) + + # pd_op.matmul: (1024x49x576xf32) <- (1024x49x192xf32, 192x576xf32) + matmul_11 = paddle._C_ops.matmul(reshape_22, parameter_271, False, False) + del parameter_271 + + # pd_op.add: (1024x49x576xf32) <- (1024x49x576xf32, 576xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_270) + del parameter_270 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_46 = [1024, 49, 3, 6, 32] + + # pd_op.reshape: (1024x49x3x6x32xf32) <- (1024x49x576xf32, 5xi64) + reshape_225 = paddle._C_ops.reshape(add_15, full_int_array_46) + + # pd_op.transpose: (3x1024x6x49x32xf32) <- (1024x49x3x6x32xf32) + transpose_14 = paddle._C_ops.transpose(reshape_225, [2, 0, 3, 1, 4]) + del reshape_225 + + # pd_op.slice: (1024x6x49x32xf32) <- (3x1024x6x49x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (1024x6x49x32xf32) <- (3x1024x6x49x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (1024x6x49x32xf32) <- (3x1024x6x49x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (1024x6x49x32xf32) <- (1024x6x49x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_28, full_0, float("0"), True) + del slice_28 + + # pd_op.transpose: (1024x6x32x49xf32) <- (1024x6x49x32xf32) + transpose_15 = paddle._C_ops.transpose(slice_29, [0, 1, 3, 2]) + del slice_29 + + # pd_op.matmul: (1024x6x49x49xf32) <- (1024x6x49x32xf32, 1024x6x32x49xf32) + matmul_12 = paddle._C_ops.matmul(scale_2, transpose_15, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_23 = paddle._C_ops.reshape(data_17, full_int_array_15) + del data_17 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_2 = paddle._C_ops.index_select(data_20, reshape_23, 0) + del data_20 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_226 = paddle._C_ops.reshape(index_select_2, full_int_array_16) + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_16 = paddle._C_ops.transpose(reshape_226, [2, 0, 1]) + del reshape_226 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_16, full_int_array_0) + + # pd_op.add: (1024x6x49x49xf32) <- (1024x6x49x49xf32, 1x6x49x49xf32) + add_173 = paddle._C_ops.add(matmul_12, unsqueeze_3) + + # pd_op.softmax: (1024x6x49x49xf32) <- (1024x6x49x49xf32) + softmax_2 = paddle._C_ops.softmax(add_173, -1) + del add_173 + + # pd_op.matmul: (1024x6x49x32xf32) <- (1024x6x49x49xf32, 1024x6x49x32xf32) + matmul_126 = paddle._C_ops.matmul(softmax_2, slice_2, False, False) + + # pd_op.transpose: (1024x49x6x32xf32) <- (1024x6x49x32xf32) + transpose_17 = paddle._C_ops.transpose(matmul_126, [0, 2, 1, 3]) + del matmul_126 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_47 = [1024, 49, 192] + + # pd_op.reshape: (1024x49x192xf32) <- (1024x49x6x32xf32, 3xi64) + reshape_24 = paddle._C_ops.reshape(transpose_17, full_int_array_47) + + # pd_op.matmul: (1024x49x192xf32) <- (1024x49x192xf32, 192x192xf32) + matmul_13 = paddle._C_ops.matmul(reshape_24, parameter_269, False, False) + del parameter_269 + + # pd_op.add: (1024x49x192xf32) <- (1024x49x192xf32, 192xf32) + add_16 = paddle._C_ops.add(matmul_13, parameter_268) + del parameter_268 + + # pd_op.reshape: (1024x7x7x192xf32) <- (1024x49x192xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_16, full_int_array_44) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_48 = [-1, 4, 4, 7, 7, 192] + + # pd_op.reshape: (64x4x4x7x7x192xf32) <- (1024x7x7x192xf32, 6xi64) + reshape_227 = paddle._C_ops.reshape(reshape_25, full_int_array_48) + + # pd_op.transpose: (64x4x7x4x7x192xf32) <- (64x4x4x7x7x192xf32) + transpose_18 = paddle._C_ops.transpose(reshape_227, [0, 1, 3, 2, 4, 5]) + del reshape_227 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_49 = [-1, 28, 28, 192] + + # pd_op.reshape: (64x28x28x192xf32) <- (64x4x7x4x7x192xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(transpose_18, full_int_array_49) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_50 = [64, 784, 192] + + # pd_op.reshape: (64x784x192xf32) <- (64x28x28x192xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, full_int_array_50) + + # pd_op.full: (xf32) <- () + full_3 = paddle._C_ops.full( + [], + float("0.973913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_29 = full_3 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_2 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_174 = paddle._C_ops.add(full_3, uniform_2) + del uniform_2 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_2 = paddle._C_ops.floor(add_174) + del add_174 + + # pd_op.divide: (64x784x192xf32) <- (64x784x192xf32, xf32) + divide_2 = paddle._C_ops.divide(reshape_27, full_3) + + # pd_op.multiply: (64x784x192xf32) <- (64x784x192xf32, 64x1x1xf32) + multiply_2 = paddle._C_ops.multiply(divide_2, floor_2) + + # pd_op.add: (64x784x192xf32) <- (64x784x192xf32, 64x784x192xf32) + add_17 = paddle._C_ops.add(matmul_10, multiply_2) + + # pd_op.layer_norm: (64x784x192xf32, 64x784xf32, 64x784xf32) <- (64x784x192xf32, 192xf32, 192xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (64x784x768xf32) <- (64x784x192xf32, 192x768xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del parameter_265 + + # pd_op.add: (64x784x768xf32) <- (64x784x768xf32, 768xf32) + add_18 = paddle._C_ops.add(matmul_14, parameter_264) + del parameter_264 + + # pd_op.gelu: (64x784x768xf32) <- (64x784x768xf32) + gelu_2 = paddle._C_ops.gelu(add_18, False) + + # pd_op.matmul: (64x784x192xf32) <- (64x784x768xf32, 768x192xf32) + matmul_15 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del parameter_263 + + # pd_op.add: (64x784x192xf32) <- (64x784x192xf32, 192xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_262) + del parameter_262 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_3 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_175 = paddle._C_ops.add(full_3, uniform_3) + del uniform_3 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_3 = paddle._C_ops.floor(add_175) + del add_175 + + # pd_op.divide: (64x784x192xf32) <- (64x784x192xf32, xf32) + divide_3 = paddle._C_ops.divide(add_19, full_3) + + # pd_op.multiply: (64x784x192xf32) <- (64x784x192xf32, 64x1x1xf32) + multiply_3 = paddle._C_ops.multiply(divide_3, floor_3) + + # pd_op.add: (64x784x192xf32) <- (64x784x192xf32, 64x784x192xf32) + add_20 = paddle._C_ops.add(add_17, multiply_3) + + # pd_op.layer_norm: (64x784x192xf32, 64x784xf32, 64x784xf32) <- (64x784x192xf32, 192xf32, 192xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # pd_op.reshape: (64x28x28x192xf32) <- (64x784x192xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(layer_norm_24, full_int_array_42) + + # pd_op.roll: (64x28x28x192xf32) <- (64x28x28x192xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_28, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x4x7x4x7x192xf32) <- (64x28x28x192xf32, 6xi64) + reshape_228 = paddle._C_ops.reshape(roll_2, full_int_array_43) + del full_int_array_43 + + # pd_op.transpose: (64x4x4x7x7x192xf32) <- (64x4x7x4x7x192xf32) + transpose_19 = paddle._C_ops.transpose(reshape_228, [0, 1, 3, 2, 4, 5]) + del reshape_228 + + # pd_op.reshape: (1024x7x7x192xf32) <- (64x4x4x7x7x192xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(transpose_19, full_int_array_44) + + # pd_op.reshape: (1024x49x192xf32) <- (1024x7x7x192xf32, 3xi64) + reshape_30 = paddle._C_ops.reshape(reshape_29, full_int_array_45) + del full_int_array_45 + + # pd_op.full: (1x28x28x1xf32) <- () + full_31 = paddle._C_ops.full( + [1, 28, 28, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_31, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_31 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_51 = [1, 4, 7, 4, 7, 1] + + # pd_op.reshape: (1x4x7x4x7x1xf32) <- (1x28x28x1xf32, 6xi64) + reshape_229 = paddle._C_ops.reshape(set_value__1, full_int_array_51) + del full_int_array_51 + + # pd_op.transpose: (1x4x4x7x7x1xf32) <- (1x4x7x4x7x1xf32) + transpose_147 = paddle._C_ops.transpose(reshape_229, [0, 1, 3, 2, 4, 5]) + del reshape_229 + + # pd_op.reshape: (16x7x7x1xf32) <- (1x4x4x7x7x1xf32, 4xi64) + reshape_230 = paddle._C_ops.reshape(transpose_147, full_int_array_36) + del transpose_147 + + # pd_op.reshape: (16x49xf32) <- (16x7x7x1xf32, 2xi64) + reshape_231 = paddle._C_ops.reshape(reshape_230, full_int_array_37) + del reshape_230 + + # pd_op.unsqueeze: (16x1x49xf32) <- (16x49xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_1) + + # pd_op.unsqueeze: (16x49x1xf32) <- (16x49xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_2) + del reshape_231 + + # pd_op.subtract: (16x49x49xf32) <- (16x1x49xf32, 16x49x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_40, unsqueeze_41) + del unsqueeze_40, unsqueeze_41 + + # pd_op.not_equal: (16x49x49xb) <- (16x49x49xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_26) + + # pd_op.full: (16x49x49xf32) <- () + full_32 = paddle._C_ops.full( + [16, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_32, subtract_1) + del full_32, not_equal_1, subtract_1 + + # pd_op.equal: (16x49x49xb) <- (16x49x49xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_26) + + # pd_op.full: (16x49x49xf32) <- () + full_33 = paddle._C_ops.full( + [16, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_3 = paddle._C_ops.where(equal_1, full_33, where_2) + del equal_1, full_33, where_2 + + # pd_op.matmul: (1024x49x576xf32) <- (1024x49x192xf32, 192x576xf32) + matmul_16 = paddle._C_ops.matmul(reshape_30, parameter_259, False, False) + del parameter_259 + + # pd_op.add: (1024x49x576xf32) <- (1024x49x576xf32, 576xf32) + add_21 = paddle._C_ops.add(matmul_16, parameter_258) + del parameter_258 + + # pd_op.reshape: (1024x49x3x6x32xf32) <- (1024x49x576xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_21, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (3x1024x6x49x32xf32) <- (1024x49x3x6x32xf32) + transpose_20 = paddle._C_ops.transpose(reshape_232, [2, 0, 3, 1, 4]) + del reshape_232 + + # pd_op.slice: (1024x6x49x32xf32) <- (3x1024x6x49x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (1024x6x49x32xf32) <- (3x1024x6x49x32xf32, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (1024x6x49x32xf32) <- (3x1024x6x49x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (1024x6x49x32xf32) <- (1024x6x49x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_30, full_0, float("0"), True) + del slice_30 + + # pd_op.transpose: (1024x6x32x49xf32) <- (1024x6x49x32xf32) + transpose_21 = paddle._C_ops.transpose(slice_31, [0, 1, 3, 2]) + del slice_31 + + # pd_op.matmul: (1024x6x49x49xf32) <- (1024x6x49x32xf32, 1024x6x32x49xf32) + matmul_17 = paddle._C_ops.matmul(scale_3, transpose_21, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_31 = paddle._C_ops.reshape(data_25, full_int_array_15) + del data_25 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_3 = paddle._C_ops.index_select(data_26, reshape_31, 0) + del data_26 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_233 = paddle._C_ops.reshape(index_select_3, full_int_array_16) + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_22 = paddle._C_ops.transpose(reshape_233, [2, 0, 1]) + del reshape_233 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_22, full_int_array_0) + + # pd_op.add: (1024x6x49x49xf32) <- (1024x6x49x49xf32, 1x6x49x49xf32) + add_22 = paddle._C_ops.add(matmul_17, unsqueeze_4) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_52 = [64, 16, 6, 49, 49] + + # pd_op.reshape: (64x16x6x49x49xf32) <- (1024x6x49x49xf32, 5xi64) + reshape_32 = paddle._C_ops.reshape(add_22, full_int_array_52) + del full_int_array_52 + + # pd_op.unsqueeze: (16x1x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(where_3, full_int_array_1) + del where_3 + + # pd_op.unsqueeze: (1x16x1x49x49xf32) <- (16x1x49x49xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_42, full_int_array_0) + del unsqueeze_42 + + # pd_op.add: (64x16x6x49x49xf32) <- (64x16x6x49x49xf32, 1x16x1x49x49xf32) + add_23 = paddle._C_ops.add(reshape_32, unsqueeze_5) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_53 = [1024, 6, 49, 49] + + # pd_op.reshape: (1024x6x49x49xf32) <- (64x16x6x49x49xf32, 4xi64) + reshape_234 = paddle._C_ops.reshape(add_23, full_int_array_53) + del full_int_array_53 + + # pd_op.softmax: (1024x6x49x49xf32) <- (1024x6x49x49xf32) + softmax_3 = paddle._C_ops.softmax(reshape_234, -1) + del reshape_234 + + # pd_op.matmul: (1024x6x49x32xf32) <- (1024x6x49x49xf32, 1024x6x49x32xf32) + matmul_127 = paddle._C_ops.matmul(softmax_3, slice_3, False, False) + + # pd_op.transpose: (1024x49x6x32xf32) <- (1024x6x49x32xf32) + transpose_23 = paddle._C_ops.transpose(matmul_127, [0, 2, 1, 3]) + del matmul_127 + + # pd_op.reshape: (1024x49x192xf32) <- (1024x49x6x32xf32, 3xi64) + reshape_33 = paddle._C_ops.reshape(transpose_23, full_int_array_47) + del full_int_array_47 + + # pd_op.matmul: (1024x49x192xf32) <- (1024x49x192xf32, 192x192xf32) + matmul_18 = paddle._C_ops.matmul(reshape_33, parameter_257, False, False) + del parameter_257 + + # pd_op.add: (1024x49x192xf32) <- (1024x49x192xf32, 192xf32) + add_24 = paddle._C_ops.add(matmul_18, parameter_256) + del parameter_256 + + # pd_op.reshape: (1024x7x7x192xf32) <- (1024x49x192xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_24, full_int_array_44) + del full_int_array_44 + + # pd_op.reshape: (64x4x4x7x7x192xf32) <- (1024x7x7x192xf32, 6xi64) + reshape_235 = paddle._C_ops.reshape(reshape_34, full_int_array_48) + del full_int_array_48 + + # pd_op.transpose: (64x4x7x4x7x192xf32) <- (64x4x4x7x7x192xf32) + transpose_24 = paddle._C_ops.transpose(reshape_235, [0, 1, 3, 2, 4, 5]) + del reshape_235 + + # pd_op.reshape: (64x28x28x192xf32) <- (64x4x7x4x7x192xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_24, full_int_array_49) + del full_int_array_49 + + # pd_op.roll: (64x28x28x192xf32) <- (64x28x28x192xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_35, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x784x192xf32) <- (64x28x28x192xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(roll_3, full_int_array_50) + del full_int_array_50 + + # pd_op.full: (xf32) <- () + full_4 = paddle._C_ops.full( + [], + float("0.96087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_40 = full_4 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_4 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_176 = paddle._C_ops.add(full_4, uniform_4) + del uniform_4 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_4 = paddle._C_ops.floor(add_176) + del add_176 + + # pd_op.divide: (64x784x192xf32) <- (64x784x192xf32, xf32) + divide_4 = paddle._C_ops.divide(reshape_36, full_4) + + # pd_op.multiply: (64x784x192xf32) <- (64x784x192xf32, 64x1x1xf32) + multiply_4 = paddle._C_ops.multiply(divide_4, floor_4) + + # pd_op.add: (64x784x192xf32) <- (64x784x192xf32, 64x784x192xf32) + add_25 = paddle._C_ops.add(add_20, multiply_4) + + # pd_op.layer_norm: (64x784x192xf32, 64x784xf32, 64x784xf32) <- (64x784x192xf32, 192xf32, 192xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (64x784x768xf32) <- (64x784x192xf32, 192x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del parameter_253 + + # pd_op.add: (64x784x768xf32) <- (64x784x768xf32, 768xf32) + add_26 = paddle._C_ops.add(matmul_19, parameter_252) + del parameter_252 + + # pd_op.gelu: (64x784x768xf32) <- (64x784x768xf32) + gelu_3 = paddle._C_ops.gelu(add_26, False) + + # pd_op.matmul: (64x784x192xf32) <- (64x784x768xf32, 768x192xf32) + matmul_20 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del parameter_251 + + # pd_op.add: (64x784x192xf32) <- (64x784x192xf32, 192xf32) + add_27 = paddle._C_ops.add(matmul_20, parameter_250) + del parameter_250 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_5 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_177 = paddle._C_ops.add(full_4, uniform_5) + del uniform_5 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_5 = paddle._C_ops.floor(add_177) + del add_177 + + # pd_op.divide: (64x784x192xf32) <- (64x784x192xf32, xf32) + divide_5 = paddle._C_ops.divide(add_27, full_4) + + # pd_op.multiply: (64x784x192xf32) <- (64x784x192xf32, 64x1x1xf32) + multiply_5 = paddle._C_ops.multiply(divide_5, floor_5) + + # pd_op.add: (64x784x192xf32) <- (64x784x192xf32, 64x784x192xf32) + add_28 = paddle._C_ops.add(add_25, multiply_5) + + # pd_op.reshape: (64x28x28x192xf32) <- (64x784x192xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_28, full_int_array_42) + + # pd_op.strided_slice: (64x14x14x192xf32) <- (64x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x14x14x192xf32) <- (64x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x14x14x192xf32) <- (64x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x14x14x192xf32) <- (64x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (64x28x28x192xf32) <- (64x28x28x192xf32, 4xi64) + reshape_236 = paddle._C_ops.reshape(reshape_37, full_int_array_42) + del full_int_array_42 + + # builtin.combine: ([64x14x14x192xf32, 64x14x14x192xf32, 64x14x14x192xf32, 64x14x14x192xf32]) <- (64x14x14x192xf32, 64x14x14x192xf32, 64x14x14x192xf32, 64x14x14x192xf32) + combine_1 = [strided_slice_4, strided_slice_5, strided_slice_6, strided_slice_7] + + # pd_op.concat: (64x14x14x768xf32) <- ([64x14x14x192xf32, 64x14x14x192xf32, 64x14x14x192xf32, 64x14x14x192xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_1, full_2) + del combine_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_54 = [64, -1, 768] + + # pd_op.reshape: (64x196x768xf32) <- (64x14x14x768xf32, 3xi64) + reshape_38 = paddle._C_ops.reshape(concat_1, full_int_array_54) + del full_int_array_54 + + # pd_op.layer_norm: (64x196x768xf32, 64x196xf32, 64x196xf32) <- (64x196x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_38, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249 + + # pd_op.matmul: (64x196x384xf32) <- (64x196x768xf32, 768x384xf32) + matmul_21 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del parameter_247 + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_21, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_55 = [64, 14, 14, 384] + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(layer_norm_33, full_int_array_55) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_56 = [64, 2, 7, 2, 7, 384] + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_237 = paddle._C_ops.reshape(reshape_39, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_25 = paddle._C_ops.transpose(reshape_237, [0, 1, 3, 2, 4, 5]) + del reshape_237 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_57 = [-1, 7, 7, 384] + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(transpose_25, full_int_array_57) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_58 = [-1, 49, 384] + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(reshape_40, full_int_array_58) + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_22 = paddle._C_ops.matmul(reshape_41, parameter_244, False, False) + del parameter_244 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_29 = paddle._C_ops.add(matmul_22, parameter_243) + del parameter_243 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_59 = [256, 49, 3, 12, 32] + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_238 = paddle._C_ops.reshape(add_29, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_26 = paddle._C_ops.transpose(reshape_238, [2, 0, 3, 1, 4]) + del reshape_238 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_32, full_0, float("0"), True) + del slice_32 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_27 = paddle._C_ops.transpose(slice_33, [0, 1, 3, 2]) + del slice_33 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_23 = paddle._C_ops.matmul(scale_4, transpose_27, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_42 = paddle._C_ops.reshape(data_27, full_int_array_15) + del data_27 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_4 = paddle._C_ops.index_select(data_28, reshape_42, 0) + del data_28 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_239 = paddle._C_ops.reshape(index_select_4, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_28 = paddle._C_ops.transpose(reshape_239, [2, 0, 1]) + del reshape_239 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_28, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_178 = paddle._C_ops.add(matmul_23, unsqueeze_6) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_4 = paddle._C_ops.softmax(add_178, -1) + del add_178 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_128 = paddle._C_ops.matmul(softmax_4, slice_4, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_29 = paddle._C_ops.transpose(matmul_128, [0, 2, 1, 3]) + del matmul_128 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_60 = [256, 49, 384] + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_29, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_24 = paddle._C_ops.matmul(reshape_43, parameter_242, False, False) + del parameter_242 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_241) + del parameter_241 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_30, full_int_array_57) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_61 = [-1, 2, 2, 7, 7, 384] + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_44, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_30 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_62 = [-1, 14, 14, 384] + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(transpose_30, full_int_array_62) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_63 = [64, 196, 384] + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_46 = paddle._C_ops.reshape(reshape_45, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_5 = paddle._C_ops.full( + [], + float("0.947826"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_62 = full_5 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_6 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_179 = paddle._C_ops.add(full_5, uniform_6) + del uniform_6 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_6 = paddle._C_ops.floor(add_179) + del add_179 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_6 = paddle._C_ops.divide(reshape_46, full_5) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_6 = paddle._C_ops.multiply(divide_6, floor_6) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_31 = paddle._C_ops.add(matmul_21, multiply_6) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_31, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del parameter_238 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_32 = paddle._C_ops.add(matmul_25, parameter_237) + del parameter_237 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_4 = paddle._C_ops.gelu(add_32, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_26 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del parameter_236 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_33 = paddle._C_ops.add(matmul_26, parameter_235) + del parameter_235 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_7 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_180 = paddle._C_ops.add(full_5, uniform_7) + del uniform_7 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_7 = paddle._C_ops.floor(add_180) + del add_180 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_7 = paddle._C_ops.divide(add_33, full_5) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_7 = paddle._C_ops.multiply(divide_7, floor_7) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_34 = paddle._C_ops.add(add_31, multiply_7) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(layer_norm_39, full_int_array_55) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_47, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_241 = paddle._C_ops.reshape(roll_4, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_31 = paddle._C_ops.transpose(reshape_241, [0, 1, 3, 2, 4, 5]) + del reshape_241 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(transpose_31, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_49 = paddle._C_ops.reshape(reshape_48, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_34 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_34, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_64 = [1, 2, 7, 2, 7, 1] + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_242 = paddle._C_ops.reshape(set_value__2, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_148 = paddle._C_ops.transpose(reshape_242, [0, 1, 3, 2, 4, 5]) + del reshape_242 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_243 = paddle._C_ops.reshape(transpose_148, full_int_array_36) + del transpose_148 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_244 = paddle._C_ops.reshape(reshape_243, full_int_array_37) + del reshape_243 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_2) + del reshape_244 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_26) + + # pd_op.full: (4x49x49xf32) <- () + full_35 = paddle._C_ops.full( + [4, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_35, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_26) + + # pd_op.full: (4x49x49xf32) <- () + full_36 = paddle._C_ops.full( + [4, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_5 = paddle._C_ops.where(equal_2, full_36, where_4) + del equal_2, where_4 + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_27 = paddle._C_ops.matmul(reshape_49, parameter_232, False, False) + del parameter_232 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_35 = paddle._C_ops.add(matmul_27, parameter_231) + del parameter_231 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_245 = paddle._C_ops.reshape(add_35, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_32 = paddle._C_ops.transpose(reshape_245, [2, 0, 3, 1, 4]) + del reshape_245 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_34, full_0, float("0"), True) + del slice_34 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_33 = paddle._C_ops.transpose(slice_35, [0, 1, 3, 2]) + del slice_35 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_28 = paddle._C_ops.matmul(scale_5, transpose_33, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_50 = paddle._C_ops.reshape(data_29, full_int_array_15) + del data_29 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_5 = paddle._C_ops.index_select(data_30, reshape_50, 0) + del data_30 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(index_select_5, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_34 = paddle._C_ops.transpose(reshape_246, [2, 0, 1]) + del reshape_246 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(transpose_34, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_36 = paddle._C_ops.add(matmul_28, unsqueeze_7) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_65 = [64, 4, 12, 49, 49] + + # pd_op.reshape: (64x4x12x49x49xf32) <- (256x12x49x49xf32, 5xi64) + reshape_51 = paddle._C_ops.reshape(add_36, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(where_5, full_int_array_1) + del where_5 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(unsqueeze_45, full_int_array_0) + del unsqueeze_45 + + # pd_op.add: (64x4x12x49x49xf32) <- (64x4x12x49x49xf32, 1x4x1x49x49xf32) + add_37 = paddle._C_ops.add(reshape_51, unsqueeze_8) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_66 = [256, 12, 49, 49] + + # pd_op.reshape: (256x12x49x49xf32) <- (64x4x12x49x49xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_37, full_int_array_66) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_5 = paddle._C_ops.softmax(reshape_247, -1) + del reshape_247 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_129 = paddle._C_ops.matmul(softmax_5, slice_5, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_35 = paddle._C_ops.transpose(matmul_129, [0, 2, 1, 3]) + del matmul_129 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_52 = paddle._C_ops.reshape(transpose_35, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_29 = paddle._C_ops.matmul(reshape_52, parameter_230, False, False) + del parameter_230 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_38 = paddle._C_ops.add(matmul_29, parameter_229) + del parameter_229 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(add_38, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_53, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_36 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(transpose_36, full_int_array_62) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_54, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(roll_5, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_6 = paddle._C_ops.full( + [], + float("0.934783"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_73 = full_6 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_8 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_181 = paddle._C_ops.add(full_6, uniform_8) + del uniform_8 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_8 = paddle._C_ops.floor(add_181) + del add_181 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_8 = paddle._C_ops.divide(reshape_55, full_6) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_8 = paddle._C_ops.multiply(divide_8, floor_8) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_39 = paddle._C_ops.add(add_34, multiply_8) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_39, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del parameter_226 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_40 = paddle._C_ops.add(matmul_30, parameter_225) + del parameter_225 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_5 = paddle._C_ops.gelu(add_40, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_31 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del parameter_224 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_41 = paddle._C_ops.add(matmul_31, parameter_223) + del parameter_223 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_9 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_182 = paddle._C_ops.add(full_6, uniform_9) + del uniform_9 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_9 = paddle._C_ops.floor(add_182) + del add_182 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_9 = paddle._C_ops.divide(add_41, full_6) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_9 = paddle._C_ops.multiply(divide_9, floor_9) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_42 = paddle._C_ops.add(add_39, multiply_9) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(layer_norm_45, full_int_array_55) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_249 = paddle._C_ops.reshape(reshape_56, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_37 = paddle._C_ops.transpose(reshape_249, [0, 1, 3, 2, 4, 5]) + del reshape_249 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(transpose_37, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_58 = paddle._C_ops.reshape(reshape_57, full_int_array_58) + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_32 = paddle._C_ops.matmul(reshape_58, parameter_220, False, False) + del parameter_220 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_43 = paddle._C_ops.add(matmul_32, parameter_219) + del parameter_219 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_250 = paddle._C_ops.reshape(add_43, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_38 = paddle._C_ops.transpose(reshape_250, [2, 0, 3, 1, 4]) + del reshape_250 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_36, full_0, float("0"), True) + del slice_36 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_39 = paddle._C_ops.transpose(slice_37, [0, 1, 3, 2]) + del slice_37 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_33 = paddle._C_ops.matmul(scale_6, transpose_39, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_59 = paddle._C_ops.reshape(data_31, full_int_array_15) + del data_31 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_6 = paddle._C_ops.index_select(data_32, reshape_59, 0) + del data_32 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_251 = paddle._C_ops.reshape(index_select_6, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_40 = paddle._C_ops.transpose(reshape_251, [2, 0, 1]) + del reshape_251 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_40, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_183 = paddle._C_ops.add(matmul_33, unsqueeze_9) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_6 = paddle._C_ops.softmax(add_183, -1) + del add_183 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_6, slice_6, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_41 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_60 = paddle._C_ops.reshape(transpose_41, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_34 = paddle._C_ops.matmul(reshape_60, parameter_218, False, False) + del parameter_218 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_44 = paddle._C_ops.add(matmul_34, parameter_217) + del parameter_217 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(add_44, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(reshape_61, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_42 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(transpose_42, full_int_array_62) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_7 = paddle._C_ops.full( + [], + float("0.921739"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_82 = full_7 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_10 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_184 = paddle._C_ops.add(full_7, uniform_10) + del uniform_10 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_10 = paddle._C_ops.floor(add_184) + del add_184 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_10 = paddle._C_ops.divide(reshape_63, full_7) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_10 = paddle._C_ops.multiply(divide_10, floor_10) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_45 = paddle._C_ops.add(add_42, multiply_10) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del parameter_214 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_46 = paddle._C_ops.add(matmul_35, parameter_213) + del parameter_213 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_6 = paddle._C_ops.gelu(add_46, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_36 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del parameter_212 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_47 = paddle._C_ops.add(matmul_36, parameter_211) + del parameter_211 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_11 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_185 = paddle._C_ops.add(full_7, uniform_11) + del uniform_11 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_11 = paddle._C_ops.floor(add_185) + del add_185 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_11 = paddle._C_ops.divide(add_47, full_7) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_11 = paddle._C_ops.multiply(divide_11, floor_11) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_48 = paddle._C_ops.add(add_45, multiply_11) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(layer_norm_51, full_int_array_55) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_64, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_253 = paddle._C_ops.reshape(roll_6, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_43 = paddle._C_ops.transpose(reshape_253, [0, 1, 3, 2, 4, 5]) + del reshape_253 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(transpose_43, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_37 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_37, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_254 = paddle._C_ops.reshape(set_value__3, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_149 = paddle._C_ops.transpose(reshape_254, [0, 1, 3, 2, 4, 5]) + del reshape_254 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_255 = paddle._C_ops.reshape(transpose_149, full_int_array_36) + del transpose_149 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_256 = paddle._C_ops.reshape(reshape_255, full_int_array_37) + del reshape_255 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_2) + del reshape_256 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_46, unsqueeze_47) + del unsqueeze_46, unsqueeze_47 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_35, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_7 = paddle._C_ops.where(equal_3, full_36, where_6) + del equal_3, where_6 + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_37 = paddle._C_ops.matmul(reshape_66, parameter_208, False, False) + del parameter_208 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_49 = paddle._C_ops.add(matmul_37, parameter_207) + del parameter_207 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_257 = paddle._C_ops.reshape(add_49, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_44 = paddle._C_ops.transpose(reshape_257, [2, 0, 3, 1, 4]) + del reshape_257 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_38, full_0, float("0"), True) + del slice_38 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_45 = paddle._C_ops.transpose(slice_39, [0, 1, 3, 2]) + del slice_39 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_38 = paddle._C_ops.matmul(scale_7, transpose_45, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_67 = paddle._C_ops.reshape(data_33, full_int_array_15) + del data_33 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_7 = paddle._C_ops.index_select(data_34, reshape_67, 0) + del data_34 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_258 = paddle._C_ops.reshape(index_select_7, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_46 = paddle._C_ops.transpose(reshape_258, [2, 0, 1]) + del reshape_258 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(transpose_46, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_50 = paddle._C_ops.add(matmul_38, unsqueeze_10) + + # pd_op.reshape: (64x4x12x49x49xf32) <- (256x12x49x49xf32, 5xi64) + reshape_68 = paddle._C_ops.reshape(add_50, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(where_7, full_int_array_1) + del where_7 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_48, full_int_array_0) + del unsqueeze_48 + + # pd_op.add: (64x4x12x49x49xf32) <- (64x4x12x49x49xf32, 1x4x1x49x49xf32) + add_51 = paddle._C_ops.add(reshape_68, unsqueeze_11) + + # pd_op.reshape: (256x12x49x49xf32) <- (64x4x12x49x49xf32, 4xi64) + reshape_259 = paddle._C_ops.reshape(add_51, full_int_array_66) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_7 = paddle._C_ops.softmax(reshape_259, -1) + del reshape_259 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_131 = paddle._C_ops.matmul(softmax_7, slice_7, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_47 = paddle._C_ops.transpose(matmul_131, [0, 2, 1, 3]) + del matmul_131 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(transpose_47, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_39 = paddle._C_ops.matmul(reshape_69, parameter_206, False, False) + del parameter_206 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_52 = paddle._C_ops.add(matmul_39, parameter_205) + del parameter_205 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(add_52, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_260 = paddle._C_ops.reshape(reshape_70, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_48 = paddle._C_ops.transpose(reshape_260, [0, 1, 3, 2, 4, 5]) + del reshape_260 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(transpose_48, full_int_array_62) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_71, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(roll_7, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_8 = paddle._C_ops.full( + [], + float("0.908696"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_93 = full_8 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_12 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_186 = paddle._C_ops.add(full_8, uniform_12) + del uniform_12 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_12 = paddle._C_ops.floor(add_186) + del add_186 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_12 = paddle._C_ops.divide(reshape_72, full_8) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_12 = paddle._C_ops.multiply(divide_12, floor_12) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_53 = paddle._C_ops.add(add_48, multiply_12) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del parameter_202 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_54 = paddle._C_ops.add(matmul_40, parameter_201) + del parameter_201 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_7 = paddle._C_ops.gelu(add_54, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_41 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del parameter_200 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_55 = paddle._C_ops.add(matmul_41, parameter_199) + del parameter_199 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_13 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_187 = paddle._C_ops.add(full_8, uniform_13) + del uniform_13 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_13 = paddle._C_ops.floor(add_187) + del add_187 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_13 = paddle._C_ops.divide(add_55, full_8) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_13 = paddle._C_ops.multiply(divide_13, floor_13) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_56 = paddle._C_ops.add(add_53, multiply_13) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(layer_norm_57, full_int_array_55) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_261 = paddle._C_ops.reshape(reshape_73, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_49 = paddle._C_ops.transpose(reshape_261, [0, 1, 3, 2, 4, 5]) + del reshape_261 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(transpose_49, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_75 = paddle._C_ops.reshape(reshape_74, full_int_array_58) + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_42 = paddle._C_ops.matmul(reshape_75, parameter_196, False, False) + del parameter_196 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_57 = paddle._C_ops.add(matmul_42, parameter_195) + del parameter_195 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_262 = paddle._C_ops.reshape(add_57, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_50 = paddle._C_ops.transpose(reshape_262, [2, 0, 3, 1, 4]) + del reshape_262 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_40, full_0, float("0"), True) + del slice_40 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_51 = paddle._C_ops.transpose(slice_41, [0, 1, 3, 2]) + del slice_41 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_43 = paddle._C_ops.matmul(scale_8, transpose_51, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_76 = paddle._C_ops.reshape(data_35, full_int_array_15) + del data_35 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_8 = paddle._C_ops.index_select(data_36, reshape_76, 0) + del data_36 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(index_select_8, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_52 = paddle._C_ops.transpose(reshape_263, [2, 0, 1]) + del reshape_263 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_52, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_188 = paddle._C_ops.add(matmul_43, unsqueeze_12) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_8 = paddle._C_ops.softmax(add_188, -1) + del add_188 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_132 = paddle._C_ops.matmul(softmax_8, slice_8, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_53 = paddle._C_ops.transpose(matmul_132, [0, 2, 1, 3]) + del matmul_132 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_77 = paddle._C_ops.reshape(transpose_53, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_44 = paddle._C_ops.matmul(reshape_77, parameter_194, False, False) + del parameter_194 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_58 = paddle._C_ops.add(matmul_44, parameter_193) + del parameter_193 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(add_58, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_264 = paddle._C_ops.reshape(reshape_78, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_54 = paddle._C_ops.transpose(reshape_264, [0, 1, 3, 2, 4, 5]) + del reshape_264 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_54, full_int_array_62) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_9 = paddle._C_ops.full( + [], + float("0.895652"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_102 = full_9 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_14 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_189 = paddle._C_ops.add(full_9, uniform_14) + del uniform_14 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_14 = paddle._C_ops.floor(add_189) + del add_189 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_14 = paddle._C_ops.divide(reshape_80, full_9) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_14 = paddle._C_ops.multiply(divide_14, floor_14) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_59 = paddle._C_ops.add(add_56, multiply_14) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_59, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_45 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_60 = paddle._C_ops.add(matmul_45, parameter_189) + del parameter_189 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_8 = paddle._C_ops.gelu(add_60, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_46 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del parameter_188 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_61 = paddle._C_ops.add(matmul_46, parameter_187) + del parameter_187 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_15 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_190 = paddle._C_ops.add(full_9, uniform_15) + del uniform_15 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_15 = paddle._C_ops.floor(add_190) + del add_190 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_15 = paddle._C_ops.divide(add_61, full_9) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_15 = paddle._C_ops.multiply(divide_15, floor_15) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_62 = paddle._C_ops.add(add_59, multiply_15) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(layer_norm_63, full_int_array_55) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_81, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(roll_8, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_55 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_55, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_38 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_38, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_266 = paddle._C_ops.reshape(set_value__4, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_150 = paddle._C_ops.transpose(reshape_266, [0, 1, 3, 2, 4, 5]) + del reshape_266 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_267 = paddle._C_ops.reshape(transpose_150, full_int_array_36) + del transpose_150 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_268 = paddle._C_ops.reshape(reshape_267, full_int_array_37) + del reshape_267 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_2) + del reshape_268 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_35, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_9 = paddle._C_ops.where(equal_4, full_36, where_8) + del equal_4, where_8 + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_47 = paddle._C_ops.matmul(reshape_83, parameter_184, False, False) + del parameter_184 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_63 = paddle._C_ops.add(matmul_47, parameter_183) + del parameter_183 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_269 = paddle._C_ops.reshape(add_63, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_56 = paddle._C_ops.transpose(reshape_269, [2, 0, 3, 1, 4]) + del reshape_269 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_42, full_0, float("0"), True) + del slice_42 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_57 = paddle._C_ops.transpose(slice_43, [0, 1, 3, 2]) + del slice_43 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_48 = paddle._C_ops.matmul(scale_9, transpose_57, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_84 = paddle._C_ops.reshape(data_37, full_int_array_15) + del data_37 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_9 = paddle._C_ops.index_select(data_38, reshape_84, 0) + del data_38 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_270 = paddle._C_ops.reshape(index_select_9, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_58 = paddle._C_ops.transpose(reshape_270, [2, 0, 1]) + del reshape_270 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(transpose_58, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_64 = paddle._C_ops.add(matmul_48, unsqueeze_13) + + # pd_op.reshape: (64x4x12x49x49xf32) <- (256x12x49x49xf32, 5xi64) + reshape_85 = paddle._C_ops.reshape(add_64, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(where_9, full_int_array_1) + del where_9 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(unsqueeze_51, full_int_array_0) + del unsqueeze_51 + + # pd_op.add: (64x4x12x49x49xf32) <- (64x4x12x49x49xf32, 1x4x1x49x49xf32) + add_65 = paddle._C_ops.add(reshape_85, unsqueeze_14) + + # pd_op.reshape: (256x12x49x49xf32) <- (64x4x12x49x49xf32, 4xi64) + reshape_271 = paddle._C_ops.reshape(add_65, full_int_array_66) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_9 = paddle._C_ops.softmax(reshape_271, -1) + del reshape_271 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_133 = paddle._C_ops.matmul(softmax_9, slice_9, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_59 = paddle._C_ops.transpose(matmul_133, [0, 2, 1, 3]) + del matmul_133 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(transpose_59, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_49 = paddle._C_ops.matmul(reshape_86, parameter_182, False, False) + del parameter_182 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_66 = paddle._C_ops.add(matmul_49, parameter_181) + del parameter_181 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(add_66, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_272 = paddle._C_ops.reshape(reshape_87, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_60 = paddle._C_ops.transpose(reshape_272, [0, 1, 3, 2, 4, 5]) + del reshape_272 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(transpose_60, full_int_array_62) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_88, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(roll_9, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_10 = paddle._C_ops.full( + [], + float("0.882609"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_113 = full_10 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_16 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_191 = paddle._C_ops.add(full_10, uniform_16) + del uniform_16 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_16 = paddle._C_ops.floor(add_191) + del add_191 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_16 = paddle._C_ops.divide(reshape_89, full_10) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_16 = paddle._C_ops.multiply(divide_16, floor_16) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_67 = paddle._C_ops.add(add_62, multiply_16) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_67, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del parameter_178 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_68 = paddle._C_ops.add(matmul_50, parameter_177) + del parameter_177 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_9 = paddle._C_ops.gelu(add_68, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_51 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del parameter_176 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_69 = paddle._C_ops.add(matmul_51, parameter_175) + del parameter_175 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_17 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_192 = paddle._C_ops.add(full_10, uniform_17) + del uniform_17 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_17 = paddle._C_ops.floor(add_192) + del add_192 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_17 = paddle._C_ops.divide(add_69, full_10) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_17 = paddle._C_ops.multiply(divide_17, floor_17) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_70 = paddle._C_ops.add(add_67, multiply_17) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(layer_norm_69, full_int_array_55) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_273 = paddle._C_ops.reshape(reshape_90, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_61 = paddle._C_ops.transpose(reshape_273, [0, 1, 3, 2, 4, 5]) + del reshape_273 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_91 = paddle._C_ops.reshape(transpose_61, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_92 = paddle._C_ops.reshape(reshape_91, full_int_array_58) + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_52 = paddle._C_ops.matmul(reshape_92, parameter_172, False, False) + del parameter_172 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_71 = paddle._C_ops.add(matmul_52, parameter_171) + del parameter_171 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_274 = paddle._C_ops.reshape(add_71, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_62 = paddle._C_ops.transpose(reshape_274, [2, 0, 3, 1, 4]) + del reshape_274 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_44, full_0, float("0"), True) + del slice_44 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_63 = paddle._C_ops.transpose(slice_45, [0, 1, 3, 2]) + del slice_45 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_53 = paddle._C_ops.matmul(scale_10, transpose_63, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_93 = paddle._C_ops.reshape(data_39, full_int_array_15) + del data_39 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_10 = paddle._C_ops.index_select(data_40, reshape_93, 0) + del data_40 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(index_select_10, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_64 = paddle._C_ops.transpose(reshape_275, [2, 0, 1]) + del reshape_275 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_64, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_193 = paddle._C_ops.add(matmul_53, unsqueeze_15) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_10 = paddle._C_ops.softmax(add_193, -1) + del add_193 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_134 = paddle._C_ops.matmul(softmax_10, slice_10, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_65 = paddle._C_ops.transpose(matmul_134, [0, 2, 1, 3]) + del matmul_134 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_94 = paddle._C_ops.reshape(transpose_65, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_54 = paddle._C_ops.matmul(reshape_94, parameter_170, False, False) + del parameter_170 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_72 = paddle._C_ops.add(matmul_54, parameter_169) + del parameter_169 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_95 = paddle._C_ops.reshape(add_72, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_276 = paddle._C_ops.reshape(reshape_95, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_66 = paddle._C_ops.transpose(reshape_276, [0, 1, 3, 2, 4, 5]) + del reshape_276 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_66, full_int_array_62) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], + float("0.869565"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_122 = full_11 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_18 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_194 = paddle._C_ops.add(full_11, uniform_18) + del uniform_18 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_18 = paddle._C_ops.floor(add_194) + del add_194 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_18 = paddle._C_ops.divide(reshape_97, full_11) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_18 = paddle._C_ops.multiply(divide_18, floor_18) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_73 = paddle._C_ops.add(add_70, multiply_18) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del parameter_166 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_74 = paddle._C_ops.add(matmul_55, parameter_165) + del parameter_165 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_10 = paddle._C_ops.gelu(add_74, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_56 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del parameter_164 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_75 = paddle._C_ops.add(matmul_56, parameter_163) + del parameter_163 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_19 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_195 = paddle._C_ops.add(full_11, uniform_19) + del uniform_19 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_19 = paddle._C_ops.floor(add_195) + del add_195 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_19 = paddle._C_ops.divide(add_75, full_11) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_19 = paddle._C_ops.multiply(divide_19, floor_19) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_76 = paddle._C_ops.add(add_73, multiply_19) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(layer_norm_75, full_int_array_55) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_98, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(roll_10, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_67 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_99 = paddle._C_ops.reshape(transpose_67, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(reshape_99, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_39 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_39, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_278 = paddle._C_ops.reshape(set_value__5, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_278, [0, 1, 3, 2, 4, 5]) + del reshape_278 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_279 = paddle._C_ops.reshape(transpose_151, full_int_array_36) + del transpose_151 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_280 = paddle._C_ops.reshape(reshape_279, full_int_array_37) + del reshape_279 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_2) + del reshape_280 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_52, unsqueeze_53) + del unsqueeze_52, unsqueeze_53 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_35, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_11 = paddle._C_ops.where(equal_5, full_36, where_10) + del equal_5, where_10 + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_57 = paddle._C_ops.matmul(reshape_100, parameter_160, False, False) + del parameter_160 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_77 = paddle._C_ops.add(matmul_57, parameter_159) + del parameter_159 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_281 = paddle._C_ops.reshape(add_77, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_68 = paddle._C_ops.transpose(reshape_281, [2, 0, 3, 1, 4]) + del reshape_281 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_46, full_0, float("0"), True) + del slice_46 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_69 = paddle._C_ops.transpose(slice_47, [0, 1, 3, 2]) + del slice_47 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_58 = paddle._C_ops.matmul(scale_11, transpose_69, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_101 = paddle._C_ops.reshape(data_41, full_int_array_15) + del data_41 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_11 = paddle._C_ops.index_select(data_42, reshape_101, 0) + del data_42 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_282 = paddle._C_ops.reshape(index_select_11, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_70 = paddle._C_ops.transpose(reshape_282, [2, 0, 1]) + del reshape_282 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(transpose_70, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_78 = paddle._C_ops.add(matmul_58, unsqueeze_16) + + # pd_op.reshape: (64x4x12x49x49xf32) <- (256x12x49x49xf32, 5xi64) + reshape_102 = paddle._C_ops.reshape(add_78, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(where_11, full_int_array_1) + del where_11 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_54, full_int_array_0) + del unsqueeze_54 + + # pd_op.add: (64x4x12x49x49xf32) <- (64x4x12x49x49xf32, 1x4x1x49x49xf32) + add_79 = paddle._C_ops.add(reshape_102, unsqueeze_17) + + # pd_op.reshape: (256x12x49x49xf32) <- (64x4x12x49x49xf32, 4xi64) + reshape_283 = paddle._C_ops.reshape(add_79, full_int_array_66) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_11 = paddle._C_ops.softmax(reshape_283, -1) + del reshape_283 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_135 = paddle._C_ops.matmul(softmax_11, slice_11, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_71 = paddle._C_ops.transpose(matmul_135, [0, 2, 1, 3]) + del matmul_135 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_103 = paddle._C_ops.reshape(transpose_71, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_59 = paddle._C_ops.matmul(reshape_103, parameter_158, False, False) + del parameter_158 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_80 = paddle._C_ops.add(matmul_59, parameter_157) + del parameter_157 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(add_80, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(reshape_104, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_72 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_105 = paddle._C_ops.reshape(transpose_72, full_int_array_62) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_105, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_106 = paddle._C_ops.reshape(roll_11, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_12 = paddle._C_ops.full( + [], + float("0.856522"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_133 = full_12 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_20 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_196 = paddle._C_ops.add(full_12, uniform_20) + del uniform_20 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_20 = paddle._C_ops.floor(add_196) + del add_196 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_20 = paddle._C_ops.divide(reshape_106, full_12) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_20 = paddle._C_ops.multiply(divide_20, floor_20) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_81 = paddle._C_ops.add(add_76, multiply_20) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del parameter_154 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_82 = paddle._C_ops.add(matmul_60, parameter_153) + del parameter_153 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_11 = paddle._C_ops.gelu(add_82, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_61 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del parameter_152 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_83 = paddle._C_ops.add(matmul_61, parameter_151) + del parameter_151 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_21 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_197 = paddle._C_ops.add(full_12, uniform_21) + del uniform_21 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_21 = paddle._C_ops.floor(add_197) + del add_197 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_21 = paddle._C_ops.divide(add_83, full_12) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_21 = paddle._C_ops.multiply(divide_21, floor_21) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_84 = paddle._C_ops.add(add_81, multiply_21) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_84, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(layer_norm_81, full_int_array_55) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_285 = paddle._C_ops.reshape(reshape_107, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_73 = paddle._C_ops.transpose(reshape_285, [0, 1, 3, 2, 4, 5]) + del reshape_285 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_73, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_58) + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_62 = paddle._C_ops.matmul(reshape_109, parameter_148, False, False) + del parameter_148 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_85 = paddle._C_ops.add(matmul_62, parameter_147) + del parameter_147 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_286 = paddle._C_ops.reshape(add_85, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_286, [2, 0, 3, 1, 4]) + del reshape_286 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_48, full_0, float("0"), True) + del slice_48 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_49, [0, 1, 3, 2]) + del slice_49 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_63 = paddle._C_ops.matmul(scale_12, transpose_75, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_110 = paddle._C_ops.reshape(data_44, full_int_array_15) + del data_44 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_12 = paddle._C_ops.index_select(data_45, reshape_110, 0) + del data_45 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_287 = paddle._C_ops.reshape(index_select_12, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_76 = paddle._C_ops.transpose(reshape_287, [2, 0, 1]) + del reshape_287 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_198 = paddle._C_ops.add(matmul_63, unsqueeze_18) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_12 = paddle._C_ops.softmax(add_198, -1) + del add_198 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_136 = paddle._C_ops.matmul(softmax_12, slice_12, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_136, [0, 2, 1, 3]) + del matmul_136 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(transpose_77, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_64 = paddle._C_ops.matmul(reshape_111, parameter_146, False, False) + del parameter_146 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_86 = paddle._C_ops.add(matmul_64, parameter_145) + del parameter_145 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_112 = paddle._C_ops.reshape(add_86, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_288 = paddle._C_ops.reshape(reshape_112, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_78 = paddle._C_ops.transpose(reshape_288, [0, 1, 3, 2, 4, 5]) + del reshape_288 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_113 = paddle._C_ops.reshape(transpose_78, full_int_array_62) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_114 = paddle._C_ops.reshape(reshape_113, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_13 = paddle._C_ops.full( + [], + float("0.843478"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_142 = full_13 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_22 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_199 = paddle._C_ops.add(full_13, uniform_22) + del uniform_22 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_22 = paddle._C_ops.floor(add_199) + del add_199 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_22 = paddle._C_ops.divide(reshape_114, full_13) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_22 = paddle._C_ops.multiply(divide_22, floor_22) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_87 = paddle._C_ops.add(add_84, multiply_22) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del parameter_142 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_88 = paddle._C_ops.add(matmul_65, parameter_141) + del parameter_141 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_12 = paddle._C_ops.gelu(add_88, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_66 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del parameter_140 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_89 = paddle._C_ops.add(matmul_66, parameter_139) + del parameter_139 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_23 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_200 = paddle._C_ops.add(full_13, uniform_23) + del uniform_23 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_23 = paddle._C_ops.floor(add_200) + del add_200 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_23 = paddle._C_ops.divide(add_89, full_13) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_23 = paddle._C_ops.multiply(divide_23, floor_23) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_90 = paddle._C_ops.add(add_87, multiply_23) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(layer_norm_87, full_int_array_55) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_115, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_289 = paddle._C_ops.reshape(roll_12, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_79 = paddle._C_ops.transpose(reshape_289, [0, 1, 3, 2, 4, 5]) + del reshape_289 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_116 = paddle._C_ops.reshape(transpose_79, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_117 = paddle._C_ops.reshape(reshape_116, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_40 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_40, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_290 = paddle._C_ops.reshape(set_value__6, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_152 = paddle._C_ops.transpose(reshape_290, [0, 1, 3, 2, 4, 5]) + del reshape_290 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(transpose_152, full_int_array_36) + del transpose_152 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_292 = paddle._C_ops.reshape(reshape_291, full_int_array_37) + del reshape_291 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_2) + del reshape_292 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_35, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_13 = paddle._C_ops.where(equal_6, full_36, where_12) + del equal_6, where_12 + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_67 = paddle._C_ops.matmul(reshape_117, parameter_136, False, False) + del parameter_136 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_91 = paddle._C_ops.add(matmul_67, parameter_135) + del parameter_135 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_293 = paddle._C_ops.reshape(add_91, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_293, [2, 0, 3, 1, 4]) + del reshape_293 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_50, full_0, float("0"), True) + del slice_50 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_51, [0, 1, 3, 2]) + del slice_51 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_68 = paddle._C_ops.matmul(scale_13, transpose_81, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_118 = paddle._C_ops.reshape(data_47, full_int_array_15) + del data_47 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_13 = paddle._C_ops.index_select(data_48, reshape_118, 0) + del data_48 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_294 = paddle._C_ops.reshape(index_select_13, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_82 = paddle._C_ops.transpose(reshape_294, [2, 0, 1]) + del reshape_294 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_92 = paddle._C_ops.add(matmul_68, unsqueeze_19) + + # pd_op.reshape: (64x4x12x49x49xf32) <- (256x12x49x49xf32, 5xi64) + reshape_119 = paddle._C_ops.reshape(add_92, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(where_13, full_int_array_1) + del where_13 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(unsqueeze_57, full_int_array_0) + del unsqueeze_57 + + # pd_op.add: (64x4x12x49x49xf32) <- (64x4x12x49x49xf32, 1x4x1x49x49xf32) + add_93 = paddle._C_ops.add(reshape_119, unsqueeze_20) + + # pd_op.reshape: (256x12x49x49xf32) <- (64x4x12x49x49xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(add_93, full_int_array_66) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_13 = paddle._C_ops.softmax(reshape_295, -1) + del reshape_295 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_13, slice_13, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_120 = paddle._C_ops.reshape(transpose_83, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_69 = paddle._C_ops.matmul(reshape_120, parameter_134, False, False) + del parameter_134 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_94 = paddle._C_ops.add(matmul_69, parameter_133) + del parameter_133 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(add_94, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_296 = paddle._C_ops.reshape(reshape_121, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_84 = paddle._C_ops.transpose(reshape_296, [0, 1, 3, 2, 4, 5]) + del reshape_296 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_122 = paddle._C_ops.reshape(transpose_84, full_int_array_62) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_122, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_123 = paddle._C_ops.reshape(roll_13, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_14 = paddle._C_ops.full( + [], + float("0.830435"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_153 = full_14 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_24 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_201 = paddle._C_ops.add(full_14, uniform_24) + del uniform_24 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_24 = paddle._C_ops.floor(add_201) + del add_201 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_24 = paddle._C_ops.divide(reshape_123, full_14) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_24 = paddle._C_ops.multiply(divide_24, floor_24) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_95 = paddle._C_ops.add(add_90, multiply_24) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del parameter_130 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_96 = paddle._C_ops.add(matmul_70, parameter_129) + del parameter_129 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_13 = paddle._C_ops.gelu(add_96, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_71 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del parameter_128 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_97 = paddle._C_ops.add(matmul_71, parameter_127) + del parameter_127 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_25 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_202 = paddle._C_ops.add(full_14, uniform_25) + del uniform_25 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_25 = paddle._C_ops.floor(add_202) + del add_202 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_25 = paddle._C_ops.divide(add_97, full_14) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_25 = paddle._C_ops.multiply(divide_25, floor_25) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_98 = paddle._C_ops.add(add_95, multiply_25) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_124 = paddle._C_ops.reshape(layer_norm_93, full_int_array_55) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_297 = paddle._C_ops.reshape(reshape_124, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_85 = paddle._C_ops.transpose(reshape_297, [0, 1, 3, 2, 4, 5]) + del reshape_297 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_85, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_58) + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_72 = paddle._C_ops.matmul(reshape_126, parameter_124, False, False) + del parameter_124 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_99 = paddle._C_ops.add(matmul_72, parameter_123) + del parameter_123 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_298 = paddle._C_ops.reshape(add_99, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_86 = paddle._C_ops.transpose(reshape_298, [2, 0, 3, 1, 4]) + del reshape_298 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_52, full_0, float("0"), True) + del slice_52 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_87 = paddle._C_ops.transpose(slice_53, [0, 1, 3, 2]) + del slice_53 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_73 = paddle._C_ops.matmul(scale_14, transpose_87, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_127 = paddle._C_ops.reshape(data_1, full_int_array_15) + del data_1 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_14 = paddle._C_ops.index_select(data_2, reshape_127, 0) + del data_2 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_299 = paddle._C_ops.reshape(index_select_14, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_88 = paddle._C_ops.transpose(reshape_299, [2, 0, 1]) + del reshape_299 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_88, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_203 = paddle._C_ops.add(matmul_73, unsqueeze_21) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_14 = paddle._C_ops.softmax(add_203, -1) + del add_203 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_138 = paddle._C_ops.matmul(softmax_14, slice_14, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_89 = paddle._C_ops.transpose(matmul_138, [0, 2, 1, 3]) + del matmul_138 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_128 = paddle._C_ops.reshape(transpose_89, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_74 = paddle._C_ops.matmul(reshape_128, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_100 = paddle._C_ops.add(matmul_74, parameter_121) + del parameter_121 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_129 = paddle._C_ops.reshape(add_100, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_300 = paddle._C_ops.reshape(reshape_129, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_90 = paddle._C_ops.transpose(reshape_300, [0, 1, 3, 2, 4, 5]) + del reshape_300 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_130 = paddle._C_ops.reshape(transpose_90, full_int_array_62) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_131 = paddle._C_ops.reshape(reshape_130, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_15 = paddle._C_ops.full( + [], + float("0.817391"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_162 = full_15 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_26 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_204 = paddle._C_ops.add(full_15, uniform_26) + del uniform_26 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_26 = paddle._C_ops.floor(add_204) + del add_204 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_26 = paddle._C_ops.divide(reshape_131, full_15) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_26 = paddle._C_ops.multiply(divide_26, floor_26) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_101 = paddle._C_ops.add(add_98, multiply_26) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del parameter_118 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_102 = paddle._C_ops.add(matmul_75, parameter_117) + del parameter_117 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_14 = paddle._C_ops.gelu(add_102, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_76 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del parameter_116 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_103 = paddle._C_ops.add(matmul_76, parameter_115) + del parameter_115 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_27 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_205 = paddle._C_ops.add(full_15, uniform_27) + del uniform_27 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_27 = paddle._C_ops.floor(add_205) + del add_205 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_27 = paddle._C_ops.divide(add_103, full_15) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_27 = paddle._C_ops.multiply(divide_27, floor_27) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_104 = paddle._C_ops.add(add_101, multiply_27) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_104, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_132 = paddle._C_ops.reshape(layer_norm_99, full_int_array_55) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_132, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_301 = paddle._C_ops.reshape(roll_14, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_91 = paddle._C_ops.transpose(reshape_301, [0, 1, 3, 2, 4, 5]) + del reshape_301 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_91, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_41 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_41, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_302 = paddle._C_ops.reshape(set_value__7, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_153 = paddle._C_ops.transpose(reshape_302, [0, 1, 3, 2, 4, 5]) + del reshape_302 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_303 = paddle._C_ops.reshape(transpose_153, full_int_array_36) + del transpose_153 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_304 = paddle._C_ops.reshape(reshape_303, full_int_array_37) + del reshape_303 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_2) + del reshape_304 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_58, unsqueeze_59) + del unsqueeze_58, unsqueeze_59 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_35, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_15 = paddle._C_ops.where(equal_7, full_36, where_14) + del equal_7, where_14 + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_77 = paddle._C_ops.matmul(reshape_134, parameter_112, False, False) + del parameter_112 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_105 = paddle._C_ops.add(matmul_77, parameter_111) + del parameter_111 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_305 = paddle._C_ops.reshape(add_105, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_92 = paddle._C_ops.transpose(reshape_305, [2, 0, 3, 1, 4]) + del reshape_305 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_54, full_0, float("0"), True) + del slice_54 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_93 = paddle._C_ops.transpose(slice_55, [0, 1, 3, 2]) + del slice_55 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_78 = paddle._C_ops.matmul(scale_15, transpose_93, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_135 = paddle._C_ops.reshape(data_3, full_int_array_15) + del data_3 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_15 = paddle._C_ops.index_select(data_4, reshape_135, 0) + del data_4 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_306 = paddle._C_ops.reshape(index_select_15, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_94 = paddle._C_ops.transpose(reshape_306, [2, 0, 1]) + del reshape_306 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(transpose_94, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_106 = paddle._C_ops.add(matmul_78, unsqueeze_22) + + # pd_op.reshape: (64x4x12x49x49xf32) <- (256x12x49x49xf32, 5xi64) + reshape_136 = paddle._C_ops.reshape(add_106, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(where_15, full_int_array_1) + del where_15 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_60, full_int_array_0) + del unsqueeze_60 + + # pd_op.add: (64x4x12x49x49xf32) <- (64x4x12x49x49xf32, 1x4x1x49x49xf32) + add_107 = paddle._C_ops.add(reshape_136, unsqueeze_23) + + # pd_op.reshape: (256x12x49x49xf32) <- (64x4x12x49x49xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(add_107, full_int_array_66) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_15 = paddle._C_ops.softmax(reshape_307, -1) + del reshape_307 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_139 = paddle._C_ops.matmul(softmax_15, slice_15, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_95 = paddle._C_ops.transpose(matmul_139, [0, 2, 1, 3]) + del matmul_139 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_137 = paddle._C_ops.reshape(transpose_95, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_79 = paddle._C_ops.matmul(reshape_137, parameter_110, False, False) + del parameter_110 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_108 = paddle._C_ops.add(matmul_79, parameter_109) + del parameter_109 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_138 = paddle._C_ops.reshape(add_108, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_308 = paddle._C_ops.reshape(reshape_138, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_96 = paddle._C_ops.transpose(reshape_308, [0, 1, 3, 2, 4, 5]) + del reshape_308 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_139 = paddle._C_ops.reshape(transpose_96, full_int_array_62) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_139, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_140 = paddle._C_ops.reshape(roll_15, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_16 = paddle._C_ops.full( + [], + float("0.804348"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_173 = full_16 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_28 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_206 = paddle._C_ops.add(full_16, uniform_28) + del uniform_28 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_28 = paddle._C_ops.floor(add_206) + del add_206 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_28 = paddle._C_ops.divide(reshape_140, full_16) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_28 = paddle._C_ops.multiply(divide_28, floor_28) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_109 = paddle._C_ops.add(add_104, multiply_28) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_109, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del parameter_106 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_110 = paddle._C_ops.add(matmul_80, parameter_105) + del parameter_105 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_15 = paddle._C_ops.gelu(add_110, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_81 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del parameter_104 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_111 = paddle._C_ops.add(matmul_81, parameter_103) + del parameter_103 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_29 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_207 = paddle._C_ops.add(full_16, uniform_29) + del uniform_29 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_29 = paddle._C_ops.floor(add_207) + del add_207 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_29 = paddle._C_ops.divide(add_111, full_16) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_29 = paddle._C_ops.multiply(divide_29, floor_29) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_112 = paddle._C_ops.add(add_109, multiply_29) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_112, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(layer_norm_105, full_int_array_55) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_309 = paddle._C_ops.reshape(reshape_141, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_97 = paddle._C_ops.transpose(reshape_309, [0, 1, 3, 2, 4, 5]) + del reshape_309 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_142 = paddle._C_ops.reshape(transpose_97, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_143 = paddle._C_ops.reshape(reshape_142, full_int_array_58) + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_82 = paddle._C_ops.matmul(reshape_143, parameter_100, False, False) + del parameter_100 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_113 = paddle._C_ops.add(matmul_82, parameter_99) + del parameter_99 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_310 = paddle._C_ops.reshape(add_113, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_98 = paddle._C_ops.transpose(reshape_310, [2, 0, 3, 1, 4]) + del reshape_310 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_56, full_0, float("0"), True) + del slice_56 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_99 = paddle._C_ops.transpose(slice_57, [0, 1, 3, 2]) + del slice_57 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_83 = paddle._C_ops.matmul(scale_16, transpose_99, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_144 = paddle._C_ops.reshape(data_5, full_int_array_15) + del data_5 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_16 = paddle._C_ops.index_select(data_6, reshape_144, 0) + del data_6 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_311 = paddle._C_ops.reshape(index_select_16, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_100 = paddle._C_ops.transpose(reshape_311, [2, 0, 1]) + del reshape_311 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_100, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_208 = paddle._C_ops.add(matmul_83, unsqueeze_24) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_16 = paddle._C_ops.softmax(add_208, -1) + del add_208 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_140 = paddle._C_ops.matmul(softmax_16, slice_16, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_101 = paddle._C_ops.transpose(matmul_140, [0, 2, 1, 3]) + del matmul_140 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_145 = paddle._C_ops.reshape(transpose_101, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_84 = paddle._C_ops.matmul(reshape_145, parameter_98, False, False) + del parameter_98 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_114 = paddle._C_ops.add(matmul_84, parameter_97) + del parameter_97 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_114, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_312 = paddle._C_ops.reshape(reshape_146, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_102 = paddle._C_ops.transpose(reshape_312, [0, 1, 3, 2, 4, 5]) + del reshape_312 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_147 = paddle._C_ops.reshape(transpose_102, full_int_array_62) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_148 = paddle._C_ops.reshape(reshape_147, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_17 = paddle._C_ops.full( + [], + float("0.791304"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_182 = full_17 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_30 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_209 = paddle._C_ops.add(full_17, uniform_30) + del uniform_30 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_30 = paddle._C_ops.floor(add_209) + del add_209 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_30 = paddle._C_ops.divide(reshape_148, full_17) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_30 = paddle._C_ops.multiply(divide_30, floor_30) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_115 = paddle._C_ops.add(add_112, multiply_30) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_115, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_85 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del parameter_94 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_116 = paddle._C_ops.add(matmul_85, parameter_93) + del parameter_93 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_16 = paddle._C_ops.gelu(add_116, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_86 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del parameter_92 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_117 = paddle._C_ops.add(matmul_86, parameter_91) + del parameter_91 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_31 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_210 = paddle._C_ops.add(full_17, uniform_31) + del uniform_31 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_31 = paddle._C_ops.floor(add_210) + del add_210 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_31 = paddle._C_ops.divide(add_117, full_17) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_31 = paddle._C_ops.multiply(divide_31, floor_31) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_118 = paddle._C_ops.add(add_115, multiply_31) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_149 = paddle._C_ops.reshape(layer_norm_111, full_int_array_55) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_149, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(roll_16, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_103 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_103, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(reshape_150, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_42 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_42, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_314 = paddle._C_ops.reshape(set_value__8, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_154 = paddle._C_ops.transpose(reshape_314, [0, 1, 3, 2, 4, 5]) + del reshape_314 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_315 = paddle._C_ops.reshape(transpose_154, full_int_array_36) + del transpose_154 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_316 = paddle._C_ops.reshape(reshape_315, full_int_array_37) + del reshape_315 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_2) + del reshape_316 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_35, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_17 = paddle._C_ops.where(equal_8, full_36, where_16) + del equal_8, where_16 + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_87 = paddle._C_ops.matmul(reshape_151, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_119 = paddle._C_ops.add(matmul_87, parameter_87) + del parameter_87 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_317 = paddle._C_ops.reshape(add_119, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_104 = paddle._C_ops.transpose(reshape_317, [2, 0, 3, 1, 4]) + del reshape_317 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_58, full_0, float("0"), True) + del slice_58 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_105 = paddle._C_ops.transpose(slice_59, [0, 1, 3, 2]) + del slice_59 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_88 = paddle._C_ops.matmul(scale_17, transpose_105, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_152 = paddle._C_ops.reshape(data_7, full_int_array_15) + del data_7 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_17 = paddle._C_ops.index_select(data_8, reshape_152, 0) + del data_8 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_17, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_106 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(transpose_106, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_120 = paddle._C_ops.add(matmul_88, unsqueeze_25) + + # pd_op.reshape: (64x4x12x49x49xf32) <- (256x12x49x49xf32, 5xi64) + reshape_153 = paddle._C_ops.reshape(add_120, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(where_17, full_int_array_1) + del where_17 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(unsqueeze_63, full_int_array_0) + del unsqueeze_63 + + # pd_op.add: (64x4x12x49x49xf32) <- (64x4x12x49x49xf32, 1x4x1x49x49xf32) + add_121 = paddle._C_ops.add(reshape_153, unsqueeze_26) + + # pd_op.reshape: (256x12x49x49xf32) <- (64x4x12x49x49xf32, 4xi64) + reshape_319 = paddle._C_ops.reshape(add_121, full_int_array_66) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_17 = paddle._C_ops.softmax(reshape_319, -1) + del reshape_319 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_141 = paddle._C_ops.matmul(softmax_17, slice_17, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_107 = paddle._C_ops.transpose(matmul_141, [0, 2, 1, 3]) + del matmul_141 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_154 = paddle._C_ops.reshape(transpose_107, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_89 = paddle._C_ops.matmul(reshape_154, parameter_86, False, False) + del parameter_86 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_122 = paddle._C_ops.add(matmul_89, parameter_85) + del parameter_85 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_155 = paddle._C_ops.reshape(add_122, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_320 = paddle._C_ops.reshape(reshape_155, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_108 = paddle._C_ops.transpose(reshape_320, [0, 1, 3, 2, 4, 5]) + del reshape_320 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_156 = paddle._C_ops.reshape(transpose_108, full_int_array_62) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_156, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_157 = paddle._C_ops.reshape(roll_17, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_18 = paddle._C_ops.full( + [], + float("0.778261"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_193 = full_18 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_32 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_211 = paddle._C_ops.add(full_18, uniform_32) + del uniform_32 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_32 = paddle._C_ops.floor(add_211) + del add_211 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_32 = paddle._C_ops.divide(reshape_157, full_18) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_32 = paddle._C_ops.multiply(divide_32, floor_32) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_123 = paddle._C_ops.add(add_118, multiply_32) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_123, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del parameter_82 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_124 = paddle._C_ops.add(matmul_90, parameter_81) + del parameter_81 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_17 = paddle._C_ops.gelu(add_124, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_91 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del parameter_80 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_125 = paddle._C_ops.add(matmul_91, parameter_79) + del parameter_79 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_33 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_212 = paddle._C_ops.add(full_18, uniform_33) + del uniform_33 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_33 = paddle._C_ops.floor(add_212) + del add_212 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_33 = paddle._C_ops.divide(add_125, full_18) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_33 = paddle._C_ops.multiply(divide_33, floor_33) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_126 = paddle._C_ops.add(add_123, multiply_33) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_126, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_158 = paddle._C_ops.reshape(layer_norm_117, full_int_array_55) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_321 = paddle._C_ops.reshape(reshape_158, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_109 = paddle._C_ops.transpose(reshape_321, [0, 1, 3, 2, 4, 5]) + del reshape_321 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_159 = paddle._C_ops.reshape(transpose_109, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_160 = paddle._C_ops.reshape(reshape_159, full_int_array_58) + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_92 = paddle._C_ops.matmul(reshape_160, parameter_76, False, False) + del parameter_76 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_127 = paddle._C_ops.add(matmul_92, parameter_75) + del parameter_75 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_322 = paddle._C_ops.reshape(add_127, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_110 = paddle._C_ops.transpose(reshape_322, [2, 0, 3, 1, 4]) + del reshape_322 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_60, full_0, float("0"), True) + del slice_60 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_111 = paddle._C_ops.transpose(slice_61, [0, 1, 3, 2]) + del slice_61 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_93 = paddle._C_ops.matmul(scale_18, transpose_111, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_161 = paddle._C_ops.reshape(data_9, full_int_array_15) + del data_9 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_18 = paddle._C_ops.index_select(data_10, reshape_161, 0) + del data_10 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_323 = paddle._C_ops.reshape(index_select_18, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_112 = paddle._C_ops.transpose(reshape_323, [2, 0, 1]) + del reshape_323 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_112, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_213 = paddle._C_ops.add(matmul_93, unsqueeze_27) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_18 = paddle._C_ops.softmax(add_213, -1) + del add_213 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_142 = paddle._C_ops.matmul(softmax_18, slice_18, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_113 = paddle._C_ops.transpose(matmul_142, [0, 2, 1, 3]) + del matmul_142 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_162 = paddle._C_ops.reshape(transpose_113, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_94 = paddle._C_ops.matmul(reshape_162, parameter_74, False, False) + del parameter_74 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_128 = paddle._C_ops.add(matmul_94, parameter_73) + del parameter_73 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_163 = paddle._C_ops.reshape(add_128, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_324 = paddle._C_ops.reshape(reshape_163, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_114 = paddle._C_ops.transpose(reshape_324, [0, 1, 3, 2, 4, 5]) + del reshape_324 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(transpose_114, full_int_array_62) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_165 = paddle._C_ops.reshape(reshape_164, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_19 = paddle._C_ops.full( + [], + float("0.765217"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_202 = full_19 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_34 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_214 = paddle._C_ops.add(full_19, uniform_34) + del uniform_34 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_34 = paddle._C_ops.floor(add_214) + del add_214 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_34 = paddle._C_ops.divide(reshape_165, full_19) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_34 = paddle._C_ops.multiply(divide_34, floor_34) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_129 = paddle._C_ops.add(add_126, multiply_34) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_129, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del parameter_70 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_130 = paddle._C_ops.add(matmul_95, parameter_69) + del parameter_69 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_18 = paddle._C_ops.gelu(add_130, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_96 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del parameter_68 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_131 = paddle._C_ops.add(matmul_96, parameter_67) + del parameter_67 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_35 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_215 = paddle._C_ops.add(full_19, uniform_35) + del uniform_35 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_35 = paddle._C_ops.floor(add_215) + del add_215 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_35 = paddle._C_ops.divide(add_131, full_19) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_35 = paddle._C_ops.multiply(divide_35, floor_35) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_132 = paddle._C_ops.add(add_129, multiply_35) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_132, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(layer_norm_123, full_int_array_55) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_166, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_325 = paddle._C_ops.reshape(roll_18, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_115 = paddle._C_ops.transpose(reshape_325, [0, 1, 3, 2, 4, 5]) + del reshape_325 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_167 = paddle._C_ops.reshape(transpose_115, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_168 = paddle._C_ops.reshape(reshape_167, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_43 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_43, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_43 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_326 = paddle._C_ops.reshape(set_value__9, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_155 = paddle._C_ops.transpose(reshape_326, [0, 1, 3, 2, 4, 5]) + del reshape_326 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(transpose_155, full_int_array_36) + del transpose_155 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_328 = paddle._C_ops.reshape(reshape_327, full_int_array_37) + del reshape_327 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_2) + del reshape_328 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_64, unsqueeze_65) + del unsqueeze_64, unsqueeze_65 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_35, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_19 = paddle._C_ops.where(equal_9, full_36, where_18) + del equal_9, where_18 + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_97 = paddle._C_ops.matmul(reshape_168, parameter_64, False, False) + del parameter_64 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_133 = paddle._C_ops.add(matmul_97, parameter_63) + del parameter_63 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_329 = paddle._C_ops.reshape(add_133, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_116 = paddle._C_ops.transpose(reshape_329, [2, 0, 3, 1, 4]) + del reshape_329 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_62, full_0, float("0"), True) + del slice_62 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_117 = paddle._C_ops.transpose(slice_63, [0, 1, 3, 2]) + del slice_63 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_98 = paddle._C_ops.matmul(scale_19, transpose_117, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_169 = paddle._C_ops.reshape(data_11, full_int_array_15) + del data_11 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_19 = paddle._C_ops.index_select(data_12, reshape_169, 0) + del data_12 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_330 = paddle._C_ops.reshape(index_select_19, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_118 = paddle._C_ops.transpose(reshape_330, [2, 0, 1]) + del reshape_330 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(transpose_118, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_134 = paddle._C_ops.add(matmul_98, unsqueeze_28) + + # pd_op.reshape: (64x4x12x49x49xf32) <- (256x12x49x49xf32, 5xi64) + reshape_170 = paddle._C_ops.reshape(add_134, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(where_19, full_int_array_1) + del where_19 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_66, full_int_array_0) + del unsqueeze_66 + + # pd_op.add: (64x4x12x49x49xf32) <- (64x4x12x49x49xf32, 1x4x1x49x49xf32) + add_135 = paddle._C_ops.add(reshape_170, unsqueeze_29) + + # pd_op.reshape: (256x12x49x49xf32) <- (64x4x12x49x49xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(add_135, full_int_array_66) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_19 = paddle._C_ops.softmax(reshape_331, -1) + del reshape_331 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_19, slice_19, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_119 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_171 = paddle._C_ops.reshape(transpose_119, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_99 = paddle._C_ops.matmul(reshape_171, parameter_62, False, False) + del parameter_62 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_136 = paddle._C_ops.add(matmul_99, parameter_61) + del parameter_61 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_172 = paddle._C_ops.reshape(add_136, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_332 = paddle._C_ops.reshape(reshape_172, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_120 = paddle._C_ops.transpose(reshape_332, [0, 1, 3, 2, 4, 5]) + del reshape_332 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_173 = paddle._C_ops.reshape(transpose_120, full_int_array_62) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_173, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_174 = paddle._C_ops.reshape(roll_19, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_20 = paddle._C_ops.full( + [], + float("0.752174"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_213 = full_20 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_36 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_216 = paddle._C_ops.add(full_20, uniform_36) + del uniform_36 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_36 = paddle._C_ops.floor(add_216) + del add_216 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_36 = paddle._C_ops.divide(reshape_174, full_20) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_36 = paddle._C_ops.multiply(divide_36, floor_36) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_137 = paddle._C_ops.add(add_132, multiply_36) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_137, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_100 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del parameter_58 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_138 = paddle._C_ops.add(matmul_100, parameter_57) + del parameter_57 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_19 = paddle._C_ops.gelu(add_138, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_101 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del parameter_56 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_139 = paddle._C_ops.add(matmul_101, parameter_55) + del parameter_55 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_37 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_217 = paddle._C_ops.add(full_20, uniform_37) + del uniform_37 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_37 = paddle._C_ops.floor(add_217) + del add_217 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_37 = paddle._C_ops.divide(add_139, full_20) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_37 = paddle._C_ops.multiply(divide_37, floor_37) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_140 = paddle._C_ops.add(add_137, multiply_37) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(layer_norm_129, full_int_array_55) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_333 = paddle._C_ops.reshape(reshape_175, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_121 = paddle._C_ops.transpose(reshape_333, [0, 1, 3, 2, 4, 5]) + del reshape_333 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_176 = paddle._C_ops.reshape(transpose_121, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_177 = paddle._C_ops.reshape(reshape_176, full_int_array_58) + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_102 = paddle._C_ops.matmul(reshape_177, parameter_52, False, False) + del parameter_52 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_141 = paddle._C_ops.add(matmul_102, parameter_51) + del parameter_51 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_334 = paddle._C_ops.reshape(add_141, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_122 = paddle._C_ops.transpose(reshape_334, [2, 0, 3, 1, 4]) + del reshape_334 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_64, full_0, float("0"), True) + del slice_64 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_123 = paddle._C_ops.transpose(slice_65, [0, 1, 3, 2]) + del slice_65 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_103 = paddle._C_ops.matmul(scale_20, transpose_123, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_178 = paddle._C_ops.reshape(data_13, full_int_array_15) + del data_13 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_20 = paddle._C_ops.index_select(data_14, reshape_178, 0) + del data_14 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_20, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_124 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_124, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_218 = paddle._C_ops.add(matmul_103, unsqueeze_30) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_20 = paddle._C_ops.softmax(add_218, -1) + del add_218 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_144 = paddle._C_ops.matmul(softmax_20, slice_20, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_125 = paddle._C_ops.transpose(matmul_144, [0, 2, 1, 3]) + del matmul_144 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_179 = paddle._C_ops.reshape(transpose_125, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_104 = paddle._C_ops.matmul(reshape_179, parameter_50, False, False) + del parameter_50 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_142 = paddle._C_ops.add(matmul_104, parameter_49) + del parameter_49 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_180 = paddle._C_ops.reshape(add_142, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_336 = paddle._C_ops.reshape(reshape_180, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_126 = paddle._C_ops.transpose(reshape_336, [0, 1, 3, 2, 4, 5]) + del reshape_336 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(transpose_126, full_int_array_62) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_21 = paddle._C_ops.full( + [], + float("0.73913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_222 = full_21 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_38 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_219 = paddle._C_ops.add(full_21, uniform_38) + del uniform_38 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_38 = paddle._C_ops.floor(add_219) + del add_219 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_38 = paddle._C_ops.divide(reshape_182, full_21) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_38 = paddle._C_ops.multiply(divide_38, floor_38) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_143 = paddle._C_ops.add(add_140, multiply_38) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_105 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del parameter_46 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_144 = paddle._C_ops.add(matmul_105, parameter_45) + del parameter_45 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_20 = paddle._C_ops.gelu(add_144, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_106 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del parameter_44 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_145 = paddle._C_ops.add(matmul_106, parameter_43) + del parameter_43 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_39 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_220 = paddle._C_ops.add(full_21, uniform_39) + del uniform_39 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_39 = paddle._C_ops.floor(add_220) + del add_220 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_39 = paddle._C_ops.divide(add_145, full_21) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_39 = paddle._C_ops.multiply(divide_39, floor_39) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_146 = paddle._C_ops.add(add_143, multiply_39) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_146, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(layer_norm_135, full_int_array_55) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_183, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_337 = paddle._C_ops.reshape(roll_20, full_int_array_56) + del full_int_array_56 + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_127 = paddle._C_ops.transpose(reshape_337, [0, 1, 3, 2, 4, 5]) + del reshape_337 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_184 = paddle._C_ops.reshape(transpose_127, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_185 = paddle._C_ops.reshape(reshape_184, full_int_array_58) + del full_int_array_58 + + # pd_op.full: (1x14x14x1xf32) <- () + full_44 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_44, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(set_value__10, full_int_array_64) + del full_int_array_64 + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_156 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_156, full_int_array_36) + del transpose_156 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, full_int_array_37) + del reshape_339 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_2) + del reshape_340 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_35, subtract_10) + del full_35, not_equal_10, subtract_10 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_26) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_21 = paddle._C_ops.where(equal_10, full_36, where_20) + del equal_10, full_36, where_20 + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_107 = paddle._C_ops.matmul(reshape_185, parameter_40, False, False) + del parameter_40 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_147 = paddle._C_ops.add(matmul_107, parameter_39) + del parameter_39 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_341 = paddle._C_ops.reshape(add_147, full_int_array_59) + del full_int_array_59 + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_128 = paddle._C_ops.transpose(reshape_341, [2, 0, 3, 1, 4]) + del reshape_341 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_66, full_0, float("0"), True) + del slice_66 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_129 = paddle._C_ops.transpose(slice_67, [0, 1, 3, 2]) + del slice_67 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_108 = paddle._C_ops.matmul(scale_21, transpose_129, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_15, full_int_array_15) + del data_15 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_21 = paddle._C_ops.index_select(data_16, reshape_186, 0) + del data_16 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_342 = paddle._C_ops.reshape(index_select_21, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_130 = paddle._C_ops.transpose(reshape_342, [2, 0, 1]) + del reshape_342 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(transpose_130, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_148 = paddle._C_ops.add(matmul_108, unsqueeze_31) + + # pd_op.reshape: (64x4x12x49x49xf32) <- (256x12x49x49xf32, 5xi64) + reshape_187 = paddle._C_ops.reshape(add_148, full_int_array_65) + del full_int_array_65 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(where_21, full_int_array_1) + del where_21 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(unsqueeze_69, full_int_array_0) + del unsqueeze_69 + + # pd_op.add: (64x4x12x49x49xf32) <- (64x4x12x49x49xf32, 1x4x1x49x49xf32) + add_149 = paddle._C_ops.add(reshape_187, unsqueeze_32) + + # pd_op.reshape: (256x12x49x49xf32) <- (64x4x12x49x49xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(add_149, full_int_array_66) + del full_int_array_66 + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_21 = paddle._C_ops.softmax(reshape_343, -1) + del reshape_343 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_145 = paddle._C_ops.matmul(softmax_21, slice_21, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_131 = paddle._C_ops.transpose(matmul_145, [0, 2, 1, 3]) + del matmul_145 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_131, full_int_array_60) + del full_int_array_60 + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_109 = paddle._C_ops.matmul(reshape_188, parameter_38, False, False) + del parameter_38 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_150 = paddle._C_ops.add(matmul_109, parameter_37) + del parameter_37 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_150, full_int_array_57) + del full_int_array_57 + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_344 = paddle._C_ops.reshape(reshape_189, full_int_array_61) + del full_int_array_61 + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_132 = paddle._C_ops.transpose(reshape_344, [0, 1, 3, 2, 4, 5]) + del reshape_344 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_190 = paddle._C_ops.reshape(transpose_132, full_int_array_62) + del full_int_array_62 + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_190, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_191 = paddle._C_ops.reshape(roll_21, full_int_array_63) + del full_int_array_63 + + # pd_op.full: (xf32) <- () + full_22 = paddle._C_ops.full( + [], + float("0.726087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_233 = full_22 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_40 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_221 = paddle._C_ops.add(full_22, uniform_40) + del uniform_40 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_40 = paddle._C_ops.floor(add_221) + del add_221 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_40 = paddle._C_ops.divide(reshape_191, full_22) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_40 = paddle._C_ops.multiply(divide_40, floor_40) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_151 = paddle._C_ops.add(add_146, multiply_40) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del parameter_34 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_152 = paddle._C_ops.add(matmul_110, parameter_33) + del parameter_33 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_21 = paddle._C_ops.gelu(add_152, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_111 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del parameter_32 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_153 = paddle._C_ops.add(matmul_111, parameter_31) + del parameter_31 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_41 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_222 = paddle._C_ops.add(full_22, uniform_41) + del uniform_41 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_41 = paddle._C_ops.floor(add_222) + del add_222 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_41 = paddle._C_ops.divide(add_153, full_22) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_41 = paddle._C_ops.multiply(divide_41, floor_41) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_154 = paddle._C_ops.add(add_151, multiply_41) + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_192 = paddle._C_ops.reshape(add_154, full_int_array_55) + + # pd_op.strided_slice: (64x7x7x384xf32) <- (64x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x7x7x384xf32) <- (64x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x7x7x384xf32) <- (64x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x7x7x384xf32) <- (64x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (64x14x14x384xf32) <- (64x14x14x384xf32, 4xi64) + reshape_345 = paddle._C_ops.reshape(reshape_192, full_int_array_55) + del full_int_array_55 + + # builtin.combine: ([64x7x7x384xf32, 64x7x7x384xf32, 64x7x7x384xf32, 64x7x7x384xf32]) <- (64x7x7x384xf32, 64x7x7x384xf32, 64x7x7x384xf32, 64x7x7x384xf32) + combine_2 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + + # pd_op.concat: (64x7x7x1536xf32) <- ([64x7x7x384xf32, 64x7x7x384xf32, 64x7x7x384xf32, 64x7x7x384xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_2, full_2) + del combine_2 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_67 = [64, -1, 1536] + + # pd_op.reshape: (64x49x1536xf32) <- (64x7x7x1536xf32, 3xi64) + reshape_193 = paddle._C_ops.reshape(concat_2, full_int_array_67) + del full_int_array_67 + + # pd_op.layer_norm: (64x49x1536xf32, 64x49xf32, 64x49xf32) <- (64x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_193, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30 + + # pd_op.matmul: (64x49x768xf32) <- (64x49x1536xf32, 1536x768xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del parameter_28 + + # pd_op.layer_norm: (64x49x768xf32, 64x49xf32, 64x49xf32) <- (64x49x768xf32, 768xf32, 768xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_112, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_68 = [64, 7, 7, 768] + + # pd_op.reshape: (64x7x7x768xf32) <- (64x49x768xf32, 4xi64) + reshape_194 = paddle._C_ops.reshape(layer_norm_144, full_int_array_68) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_69 = [64, 1, 7, 1, 7, 768] + + # pd_op.reshape: (64x1x7x1x7x768xf32) <- (64x7x7x768xf32, 6xi64) + reshape_346 = paddle._C_ops.reshape(reshape_194, full_int_array_69) + + # pd_op.transpose: (64x1x1x7x7x768xf32) <- (64x1x7x1x7x768xf32) + transpose_133 = paddle._C_ops.transpose(reshape_346, [0, 1, 3, 2, 4, 5]) + del reshape_346 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_70 = [-1, 7, 7, 768] + + # pd_op.reshape: (64x7x7x768xf32) <- (64x1x1x7x7x768xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_133, full_int_array_70) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_71 = [-1, 49, 768] + + # pd_op.reshape: (64x49x768xf32) <- (64x7x7x768xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_71) + + # pd_op.matmul: (64x49x2304xf32) <- (64x49x768xf32, 768x2304xf32) + matmul_113 = paddle._C_ops.matmul(reshape_196, parameter_25, False, False) + del parameter_25 + + # pd_op.add: (64x49x2304xf32) <- (64x49x2304xf32, 2304xf32) + add_155 = paddle._C_ops.add(matmul_113, parameter_24) + del parameter_24 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_72 = [64, 49, 3, 24, 32] + + # pd_op.reshape: (64x49x3x24x32xf32) <- (64x49x2304xf32, 5xi64) + reshape_347 = paddle._C_ops.reshape(add_155, full_int_array_72) + + # pd_op.transpose: (3x64x24x49x32xf32) <- (64x49x3x24x32xf32) + transpose_134 = paddle._C_ops.transpose(reshape_347, [2, 0, 3, 1, 4]) + del reshape_347 + + # pd_op.slice: (64x24x49x32xf32) <- (3x64x24x49x32xf32, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x24x49x32xf32) <- (3x64x24x49x32xf32, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x24x49x32xf32) <- (3x64x24x49x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x24x49x32xf32) <- (64x24x49x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_68, full_0, float("0"), True) + del slice_68 + + # pd_op.transpose: (64x24x32x49xf32) <- (64x24x49x32xf32) + transpose_135 = paddle._C_ops.transpose(slice_69, [0, 1, 3, 2]) + del slice_69 + + # pd_op.matmul: (64x24x49x49xf32) <- (64x24x49x32xf32, 64x24x32x49xf32) + matmul_114 = paddle._C_ops.matmul(scale_22, transpose_135, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_197 = paddle._C_ops.reshape(data_18, full_int_array_15) + del data_18 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_22 = paddle._C_ops.index_select(data_19, reshape_197, 0) + del data_19 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_348 = paddle._C_ops.reshape(index_select_22, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_136 = paddle._C_ops.transpose(reshape_348, [2, 0, 1]) + del reshape_348 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_136, full_int_array_0) + + # pd_op.add: (64x24x49x49xf32) <- (64x24x49x49xf32, 1x24x49x49xf32) + add_223 = paddle._C_ops.add(matmul_114, unsqueeze_33) + + # pd_op.softmax: (64x24x49x49xf32) <- (64x24x49x49xf32) + softmax_22 = paddle._C_ops.softmax(add_223, -1) + del add_223 + + # pd_op.matmul: (64x24x49x32xf32) <- (64x24x49x49xf32, 64x24x49x32xf32) + matmul_146 = paddle._C_ops.matmul(softmax_22, slice_22, False, False) + + # pd_op.transpose: (64x49x24x32xf32) <- (64x24x49x32xf32) + transpose_137 = paddle._C_ops.transpose(matmul_146, [0, 2, 1, 3]) + del matmul_146 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_73 = [64, 49, 768] + + # pd_op.reshape: (64x49x768xf32) <- (64x49x24x32xf32, 3xi64) + reshape_198 = paddle._C_ops.reshape(transpose_137, full_int_array_73) + + # pd_op.matmul: (64x49x768xf32) <- (64x49x768xf32, 768x768xf32) + matmul_115 = paddle._C_ops.matmul(reshape_198, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 768xf32) + add_156 = paddle._C_ops.add(matmul_115, parameter_22) + del parameter_22 + + # pd_op.reshape: (64x7x7x768xf32) <- (64x49x768xf32, 4xi64) + reshape_199 = paddle._C_ops.reshape(add_156, full_int_array_70) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_74 = [-1, 1, 1, 7, 7, 768] + + # pd_op.reshape: (64x1x1x7x7x768xf32) <- (64x7x7x768xf32, 6xi64) + reshape_349 = paddle._C_ops.reshape(reshape_199, full_int_array_74) + + # pd_op.transpose: (64x1x7x1x7x768xf32) <- (64x1x1x7x7x768xf32) + transpose_138 = paddle._C_ops.transpose(reshape_349, [0, 1, 3, 2, 4, 5]) + del reshape_349 + + # pd_op.reshape: (64x7x7x768xf32) <- (64x1x7x1x7x768xf32, 4xi64) + reshape_200 = paddle._C_ops.reshape(transpose_138, full_int_array_70) + + # pd_op.reshape: (64x49x768xf32) <- (64x7x7x768xf32, 3xi64) + reshape_201 = paddle._C_ops.reshape(reshape_200, full_int_array_73) + + # pd_op.full: (xf32) <- () + full_23 = paddle._C_ops.full( + [], + float("0.713043"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_255 = full_23 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_42 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_224 = paddle._C_ops.add(full_23, uniform_42) + del uniform_42 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_42 = paddle._C_ops.floor(add_224) + del add_224 + + # pd_op.divide: (64x49x768xf32) <- (64x49x768xf32, xf32) + divide_42 = paddle._C_ops.divide(reshape_201, full_23) + + # pd_op.multiply: (64x49x768xf32) <- (64x49x768xf32, 64x1x1xf32) + multiply_42 = paddle._C_ops.multiply(divide_42, floor_42) + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 64x49x768xf32) + add_157 = paddle._C_ops.add(matmul_112, multiply_42) + + # pd_op.layer_norm: (64x49x768xf32, 64x49xf32, 64x49xf32) <- (64x49x768xf32, 768xf32, 768xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_157, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (64x49x3072xf32) <- (64x49x768xf32, 768x3072xf32) + matmul_116 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del parameter_19 + + # pd_op.add: (64x49x3072xf32) <- (64x49x3072xf32, 3072xf32) + add_158 = paddle._C_ops.add(matmul_116, parameter_18) + del parameter_18 + + # pd_op.gelu: (64x49x3072xf32) <- (64x49x3072xf32) + gelu_22 = paddle._C_ops.gelu(add_158, False) + + # pd_op.matmul: (64x49x768xf32) <- (64x49x3072xf32, 3072x768xf32) + matmul_117 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 768xf32) + add_159 = paddle._C_ops.add(matmul_117, parameter_16) + del parameter_16 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_43 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_225 = paddle._C_ops.add(full_23, uniform_43) + del uniform_43 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_43 = paddle._C_ops.floor(add_225) + del add_225 + + # pd_op.divide: (64x49x768xf32) <- (64x49x768xf32, xf32) + divide_43 = paddle._C_ops.divide(add_159, full_23) + + # pd_op.multiply: (64x49x768xf32) <- (64x49x768xf32, 64x1x1xf32) + multiply_43 = paddle._C_ops.multiply(divide_43, floor_43) + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 64x49x768xf32) + add_160 = paddle._C_ops.add(add_157, multiply_43) + + # pd_op.layer_norm: (64x49x768xf32, 64x49xf32, 64x49xf32) <- (64x49x768xf32, 768xf32, 768xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_160, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # pd_op.reshape: (64x7x7x768xf32) <- (64x49x768xf32, 4xi64) + reshape_202 = paddle._C_ops.reshape(layer_norm_150, full_int_array_68) + del full_int_array_68 + + # pd_op.roll: (64x7x7x768xf32) <- (64x7x7x768xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_202, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x1x7x1x7x768xf32) <- (64x7x7x768xf32, 6xi64) + reshape_350 = paddle._C_ops.reshape(roll_22, full_int_array_69) + del full_int_array_69 + + # pd_op.transpose: (64x1x1x7x7x768xf32) <- (64x1x7x1x7x768xf32) + transpose_139 = paddle._C_ops.transpose(reshape_350, [0, 1, 3, 2, 4, 5]) + del reshape_350 + + # pd_op.reshape: (64x7x7x768xf32) <- (64x1x1x7x7x768xf32, 4xi64) + reshape_203 = paddle._C_ops.reshape(transpose_139, full_int_array_70) + + # pd_op.reshape: (64x49x768xf32) <- (64x7x7x768xf32, 3xi64) + reshape_204 = paddle._C_ops.reshape(reshape_203, full_int_array_71) + del full_int_array_71 + + # pd_op.full: (1x7x7x1xf32) <- () + full_45 = paddle._C_ops.full( + [1, 7, 7, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_45, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_45, full_int_array_21 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_24, set_value__100 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_26, full_int_array_27, set_value__101 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_28, set_value__102 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_22, set_value__103 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_25, full_int_array_30, set_value__104 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_31, full_int_array_32, set_value__105 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_29, full_int_array_33, set_value__106 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_34, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_75 = [1, 1, 7, 1, 7, 1] + + # pd_op.reshape: (1x1x7x1x7x1xf32) <- (1x7x7x1xf32, 6xi64) + reshape_351 = paddle._C_ops.reshape(set_value__11, full_int_array_75) + del full_int_array_75 + + # pd_op.transpose: (1x1x1x7x7x1xf32) <- (1x1x7x1x7x1xf32) + transpose_157 = paddle._C_ops.transpose(reshape_351, [0, 1, 3, 2, 4, 5]) + del reshape_351 + + # pd_op.reshape: (1x7x7x1xf32) <- (1x1x1x7x7x1xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(transpose_157, full_int_array_36) + del full_int_array_36, transpose_157 + + # pd_op.reshape: (1x49xf32) <- (1x7x7x1xf32, 2xi64) + reshape_353 = paddle._C_ops.reshape(reshape_352, full_int_array_37) + del full_int_array_37, reshape_352 + + # pd_op.unsqueeze: (1x1x49xf32) <- (1x49xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_1) + + # pd_op.unsqueeze: (1x49x1xf32) <- (1x49xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_2) + del reshape_353 + + # pd_op.subtract: (1x49x49xf32) <- (1x1x49xf32, 1x49x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_70, unsqueeze_71) + del unsqueeze_70, unsqueeze_71 + + # pd_op.not_equal: (1x49x49xb) <- (1x49x49xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_26) + + # pd_op.full: (1x49x49xf32) <- () + full_46 = paddle._C_ops.full( + [1, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_46, subtract_11) + del full_46, not_equal_11, subtract_11 + + # pd_op.equal: (1x49x49xb) <- (1x49x49xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_26) + del full_26 + + # pd_op.full: (1x49x49xf32) <- () + full_47 = paddle._C_ops.full( + [1, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_23 = paddle._C_ops.where(equal_11, full_47, where_22) + del equal_11, full_47, where_22 + + # pd_op.matmul: (64x49x2304xf32) <- (64x49x768xf32, 768x2304xf32) + matmul_118 = paddle._C_ops.matmul(reshape_204, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (64x49x2304xf32) <- (64x49x2304xf32, 2304xf32) + add_161 = paddle._C_ops.add(matmul_118, parameter_12) + del parameter_12 + + # pd_op.reshape: (64x49x3x24x32xf32) <- (64x49x2304xf32, 5xi64) + reshape_354 = paddle._C_ops.reshape(add_161, full_int_array_72) + del full_int_array_72 + + # pd_op.transpose: (3x64x24x49x32xf32) <- (64x49x3x24x32xf32) + transpose_140 = paddle._C_ops.transpose(reshape_354, [2, 0, 3, 1, 4]) + del reshape_354 + + # pd_op.slice: (64x24x49x32xf32) <- (3x64x24x49x32xf32, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x24x49x32xf32) <- (3x64x24x49x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x24x49x32xf32) <- (3x64x24x49x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x24x49x32xf32) <- (64x24x49x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_70, full_0, float("0"), True) + del slice_70 + + # pd_op.transpose: (64x24x32x49xf32) <- (64x24x49x32xf32) + transpose_141 = paddle._C_ops.transpose(slice_71, [0, 1, 3, 2]) + del slice_71 + + # pd_op.matmul: (64x24x49x49xf32) <- (64x24x49x32xf32, 64x24x32x49xf32) + matmul_119 = paddle._C_ops.matmul(scale_23, transpose_141, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_205 = paddle._C_ops.reshape(data_21, full_int_array_15) + del data_21, full_int_array_15 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_23 = paddle._C_ops.index_select(data_22, reshape_205, 0) + del data_22 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_355 = paddle._C_ops.reshape(index_select_23, full_int_array_16) + del full_int_array_16 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_142 = paddle._C_ops.transpose(reshape_355, [2, 0, 1]) + del reshape_355 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(transpose_142, full_int_array_0) + + # pd_op.add: (64x24x49x49xf32) <- (64x24x49x49xf32, 1x24x49x49xf32) + add_162 = paddle._C_ops.add(matmul_119, unsqueeze_34) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_76 = [64, 1, 24, 49, 49] + + # pd_op.reshape: (64x1x24x49x49xf32) <- (64x24x49x49xf32, 5xi64) + reshape_206 = paddle._C_ops.reshape(add_162, full_int_array_76) + del full_int_array_76 + + # pd_op.unsqueeze: (1x1x49x49xf32) <- (1x49x49xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(where_23, full_int_array_1) + del where_23 + + # pd_op.unsqueeze: (1x1x1x49x49xf32) <- (1x1x49x49xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_72, full_int_array_0) + del unsqueeze_72 + + # pd_op.add: (64x1x24x49x49xf32) <- (64x1x24x49x49xf32, 1x1x1x49x49xf32) + add_163 = paddle._C_ops.add(reshape_206, unsqueeze_35) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_77 = [64, 24, 49, 49] + + # pd_op.reshape: (64x24x49x49xf32) <- (64x1x24x49x49xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(add_163, full_int_array_77) + del full_int_array_77 + + # pd_op.softmax: (64x24x49x49xf32) <- (64x24x49x49xf32) + softmax_23 = paddle._C_ops.softmax(reshape_356, -1) + del reshape_356 + + # pd_op.matmul: (64x24x49x32xf32) <- (64x24x49x49xf32, 64x24x49x32xf32) + matmul_147 = paddle._C_ops.matmul(softmax_23, slice_23, False, False) + + # pd_op.transpose: (64x49x24x32xf32) <- (64x24x49x32xf32) + transpose_143 = paddle._C_ops.transpose(matmul_147, [0, 2, 1, 3]) + del matmul_147 + + # pd_op.reshape: (64x49x768xf32) <- (64x49x24x32xf32, 3xi64) + reshape_207 = paddle._C_ops.reshape(transpose_143, full_int_array_73) + + # pd_op.matmul: (64x49x768xf32) <- (64x49x768xf32, 768x768xf32) + matmul_120 = paddle._C_ops.matmul(reshape_207, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 768xf32) + add_164 = paddle._C_ops.add(matmul_120, parameter_10) + del parameter_10 + + # pd_op.reshape: (64x7x7x768xf32) <- (64x49x768xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(add_164, full_int_array_70) + + # pd_op.reshape: (64x1x1x7x7x768xf32) <- (64x7x7x768xf32, 6xi64) + reshape_357 = paddle._C_ops.reshape(reshape_208, full_int_array_74) + del full_int_array_74 + + # pd_op.transpose: (64x1x7x1x7x768xf32) <- (64x1x1x7x7x768xf32) + transpose_144 = paddle._C_ops.transpose(reshape_357, [0, 1, 3, 2, 4, 5]) + del reshape_357 + + # pd_op.reshape: (64x7x7x768xf32) <- (64x1x7x1x7x768xf32, 4xi64) + reshape_209 = paddle._C_ops.reshape(transpose_144, full_int_array_70) + del full_int_array_70 + + # pd_op.roll: (64x7x7x768xf32) <- (64x7x7x768xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_209, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x49x768xf32) <- (64x7x7x768xf32, 3xi64) + reshape_210 = paddle._C_ops.reshape(roll_23, full_int_array_73) + del full_int_array_73 + + # pd_op.full: (xf32) <- () + full_24 = paddle._C_ops.full( + [], float("0.7"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_266 = full_24 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_44 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_226 = paddle._C_ops.add(full_24, uniform_44) + del uniform_44 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_44 = paddle._C_ops.floor(add_226) + del add_226 + + # pd_op.divide: (64x49x768xf32) <- (64x49x768xf32, xf32) + divide_44 = paddle._C_ops.divide(reshape_210, full_24) + + # pd_op.multiply: (64x49x768xf32) <- (64x49x768xf32, 64x1x1xf32) + multiply_44 = paddle._C_ops.multiply(divide_44, floor_44) + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 64x49x768xf32) + add_165 = paddle._C_ops.add(add_160, multiply_44) + + # pd_op.layer_norm: (64x49x768xf32, 64x49xf32, 64x49xf32) <- (64x49x768xf32, 768xf32, 768xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_165, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (64x49x3072xf32) <- (64x49x768xf32, 768x3072xf32) + matmul_121 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (64x49x3072xf32) <- (64x49x3072xf32, 3072xf32) + add_166 = paddle._C_ops.add(matmul_121, parameter_6) + del parameter_6 + + # pd_op.gelu: (64x49x3072xf32) <- (64x49x3072xf32) + gelu_23 = paddle._C_ops.gelu(add_166, False) + + # pd_op.matmul: (64x49x768xf32) <- (64x49x3072xf32, 3072x768xf32) + matmul_122 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del parameter_5 + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 768xf32) + add_167 = paddle._C_ops.add(matmul_122, parameter_4) + del parameter_4 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_45 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_29, + full_30, + 0, + paddle.framework._current_expected_place(), + ) + del full_29, full_30, full_int_array_40 + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_227 = paddle._C_ops.add(full_24, uniform_45) + del uniform_45 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_45 = paddle._C_ops.floor(add_227) + del add_227 + + # pd_op.divide: (64x49x768xf32) <- (64x49x768xf32, xf32) + divide_45 = paddle._C_ops.divide(add_167, full_24) + + # pd_op.multiply: (64x49x768xf32) <- (64x49x768xf32, 64x1x1xf32) + multiply_45 = paddle._C_ops.multiply(divide_45, floor_45) + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 64x49x768xf32) + add_168 = paddle._C_ops.add(add_165, multiply_45) + + # pd_op.layer_norm: (64x49x768xf32, 64x49xf32, 64x49xf32) <- (64x49x768xf32, 768xf32, 768xf32) + layer_norm_158, layer_norm_156, layer_norm_157 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_168, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_2, parameter_3 + + # pd_op.transpose: (64x768x49xf32) <- (64x49x768xf32) + transpose_145 = paddle._C_ops.transpose(layer_norm_158, [0, 2, 1]) + del layer_norm_158 + + # pd_op.unsqueeze: (64x768x1x49xf32) <- (64x768x49xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_145, full_int_array_2) + + # pd_op.pool2d: (64x768x1x1xf32) <- (64x768x1x49xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_36, + full_int_array_23, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_23 + + # pd_op.squeeze: (64x768x1xf32) <- (64x768x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_2) + + # pd_op.flatten: (64x768xf32) <- (64x768x1xf32) + flatten_0 = paddle._C_ops.flatten(squeeze_0, 1, 2) + + # pd_op.matmul: (64x102xf32) <- (64x768xf32, 768x102xf32) + matmul_123 = paddle._C_ops.matmul(flatten_0, parameter_1, False, False) + del parameter_1 + + # pd_op.add: (64x102xf32) <- (64x102xf32, 102xf32) + add_169 = paddle._C_ops.add(matmul_123, parameter_0) + del ( + assign_0, + assign_1, + assign_10, + assign_101, + assign_103, + assign_104, + assign_105, + assign_106, + assign_107, + assign_108, + assign_109, + assign_111, + assign_112, + assign_114, + assign_115, + assign_116, + assign_117, + assign_118, + assign_119, + assign_12, + assign_121, + assign_123, + assign_124, + assign_125, + assign_126, + assign_127, + assign_128, + assign_129, + assign_13, + assign_131, + assign_132, + assign_134, + assign_135, + assign_136, + assign_137, + assign_138, + assign_139, + assign_14, + assign_141, + assign_143, + assign_144, + assign_145, + assign_146, + assign_147, + assign_148, + assign_149, + assign_15, + assign_151, + assign_152, + assign_154, + assign_155, + assign_156, + assign_157, + assign_158, + assign_159, + assign_16, + assign_161, + assign_163, + assign_164, + assign_165, + assign_166, + assign_167, + assign_168, + assign_169, + assign_17, + assign_171, + assign_172, + assign_174, + assign_175, + assign_176, + assign_177, + assign_178, + assign_179, + assign_18, + assign_181, + assign_183, + assign_184, + assign_185, + assign_186, + assign_187, + assign_188, + assign_189, + assign_19, + assign_191, + assign_192, + assign_194, + assign_195, + assign_196, + assign_197, + assign_198, + assign_199, + assign_2, + assign_20, + assign_201, + assign_203, + assign_204, + assign_205, + assign_206, + assign_207, + assign_208, + assign_209, + assign_21, + assign_211, + assign_212, + assign_214, + assign_215, + assign_216, + assign_217, + assign_218, + assign_219, + assign_22, + assign_221, + assign_223, + assign_224, + assign_225, + assign_226, + assign_227, + assign_228, + assign_229, + assign_23, + assign_231, + assign_232, + assign_234, + assign_235, + assign_236, + assign_237, + assign_238, + assign_239, + assign_24, + assign_240, + assign_241, + assign_242, + assign_243, + assign_244, + assign_245, + assign_247, + assign_248, + assign_249, + assign_25, + assign_250, + assign_251, + assign_252, + assign_254, + assign_256, + assign_257, + assign_258, + assign_259, + assign_26, + assign_260, + assign_261, + assign_262, + assign_264, + assign_265, + assign_267, + assign_268, + assign_269, + assign_28, + assign_3, + assign_30, + assign_31, + assign_32, + assign_33, + assign_34, + assign_35, + assign_36, + assign_38, + assign_39, + assign_4, + assign_41, + assign_42, + assign_43, + assign_44, + assign_45, + assign_46, + assign_47, + assign_48, + assign_49, + assign_5, + assign_50, + assign_51, + assign_52, + assign_54, + assign_55, + assign_56, + assign_57, + assign_58, + assign_59, + assign_6, + assign_61, + assign_63, + assign_64, + assign_65, + assign_66, + assign_67, + assign_68, + assign_69, + assign_7, + assign_71, + assign_72, + assign_74, + assign_75, + assign_76, + assign_77, + assign_78, + assign_79, + assign_8, + assign_81, + assign_83, + assign_84, + assign_85, + assign_86, + assign_87, + assign_88, + assign_89, + assign_91, + assign_92, + assign_94, + assign_95, + assign_96, + assign_97, + assign_98, + assign_99, + full_int_array_0, + full_int_array_1, + full_int_array_2, + full_int_array_3, + full_int_array_4, + full_int_array_5, + full_int_array_6, + full_int_array_7, + full_int_array_8, + parameter_0, + ) + + return ( + conv2d_0, + reshape_0, + add_0, + transpose_0, + layer_norm_0, + layer_norm_1, + layer_norm_2, + layer_norm_3, + layer_norm_4, + layer_norm_5, + reshape_1, + transpose_1, + reshape_2, + reshape_3, + matmul_0, + add_1, + transpose_2, + slice_0, + full_0, + scale_0, + transpose_3, + matmul_1, + reshape_4, + index_select_0, + transpose_4, + unsqueeze_0, + softmax_0, + transpose_5, + reshape_5, + matmul_2, + add_2, + reshape_6, + transpose_6, + reshape_7, + reshape_8, + add_3, + layer_norm_6, + layer_norm_7, + layer_norm_8, + matmul_3, + add_4, + gelu_0, + matmul_4, + add_5, + add_6, + layer_norm_9, + layer_norm_10, + layer_norm_11, + reshape_9, + roll_0, + transpose_7, + reshape_10, + reshape_11, + matmul_5, + add_7, + transpose_8, + slice_1, + assign_9, + scale_1, + transpose_9, + matmul_6, + reshape_12, + index_select_1, + transpose_10, + unsqueeze_1, + add_8, + reshape_13, + unsqueeze_2, + add_9, + softmax_1, + transpose_11, + reshape_14, + matmul_7, + add_10, + reshape_15, + transpose_12, + reshape_16, + roll_1, + reshape_17, + full_1, + floor_0, + divide_0, + multiply_0, + add_11, + layer_norm_12, + layer_norm_13, + layer_norm_14, + matmul_8, + add_12, + gelu_1, + matmul_9, + add_13, + assign_11, + floor_1, + divide_1, + multiply_1, + add_14, + reshape_18, + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + full_2, + concat_0, + reshape_19, + layer_norm_15, + layer_norm_16, + layer_norm_17, + matmul_10, + layer_norm_18, + layer_norm_19, + layer_norm_20, + reshape_20, + transpose_13, + reshape_21, + reshape_22, + matmul_11, + add_15, + transpose_14, + slice_2, + assign_27, + scale_2, + transpose_15, + matmul_12, + reshape_23, + index_select_2, + transpose_16, + unsqueeze_3, + softmax_2, + transpose_17, + reshape_24, + matmul_13, + add_16, + reshape_25, + transpose_18, + reshape_26, + reshape_27, + full_3, + floor_2, + divide_2, + multiply_2, + add_17, + layer_norm_21, + layer_norm_22, + layer_norm_23, + matmul_14, + add_18, + gelu_2, + matmul_15, + add_19, + assign_29, + floor_3, + divide_3, + multiply_3, + add_20, + layer_norm_24, + layer_norm_25, + layer_norm_26, + reshape_28, + roll_2, + transpose_19, + reshape_29, + reshape_30, + matmul_16, + add_21, + transpose_20, + slice_3, + assign_37, + scale_3, + transpose_21, + matmul_17, + reshape_31, + index_select_3, + transpose_22, + unsqueeze_4, + add_22, + reshape_32, + unsqueeze_5, + add_23, + softmax_3, + transpose_23, + reshape_33, + matmul_18, + add_24, + reshape_34, + transpose_24, + reshape_35, + roll_3, + reshape_36, + full_4, + floor_4, + divide_4, + multiply_4, + add_25, + layer_norm_27, + layer_norm_28, + layer_norm_29, + matmul_19, + add_26, + gelu_3, + matmul_20, + add_27, + assign_40, + floor_5, + divide_5, + multiply_5, + add_28, + reshape_37, + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + assign_53, + concat_1, + reshape_38, + layer_norm_30, + layer_norm_31, + layer_norm_32, + matmul_21, + layer_norm_33, + layer_norm_34, + layer_norm_35, + reshape_39, + transpose_25, + reshape_40, + reshape_41, + matmul_22, + add_29, + transpose_26, + slice_4, + assign_60, + scale_4, + transpose_27, + matmul_23, + reshape_42, + index_select_4, + transpose_28, + unsqueeze_6, + softmax_4, + transpose_29, + reshape_43, + matmul_24, + add_30, + reshape_44, + transpose_30, + reshape_45, + reshape_46, + full_5, + floor_6, + divide_6, + multiply_6, + add_31, + layer_norm_36, + layer_norm_37, + layer_norm_38, + matmul_25, + add_32, + gelu_4, + matmul_26, + add_33, + assign_62, + floor_7, + divide_7, + multiply_7, + add_34, + layer_norm_39, + layer_norm_40, + layer_norm_41, + reshape_47, + roll_4, + transpose_31, + reshape_48, + reshape_49, + matmul_27, + add_35, + transpose_32, + slice_5, + assign_70, + scale_5, + transpose_33, + matmul_28, + reshape_50, + index_select_5, + transpose_34, + unsqueeze_7, + add_36, + reshape_51, + unsqueeze_8, + add_37, + softmax_5, + transpose_35, + reshape_52, + matmul_29, + add_38, + reshape_53, + transpose_36, + reshape_54, + roll_5, + reshape_55, + full_6, + floor_8, + divide_8, + multiply_8, + add_39, + layer_norm_42, + layer_norm_43, + layer_norm_44, + matmul_30, + add_40, + gelu_5, + matmul_31, + add_41, + assign_73, + floor_9, + divide_9, + multiply_9, + add_42, + layer_norm_45, + layer_norm_46, + layer_norm_47, + reshape_56, + transpose_37, + reshape_57, + reshape_58, + matmul_32, + add_43, + transpose_38, + slice_6, + assign_80, + scale_6, + transpose_39, + matmul_33, + reshape_59, + index_select_6, + transpose_40, + unsqueeze_9, + softmax_6, + transpose_41, + reshape_60, + matmul_34, + add_44, + reshape_61, + transpose_42, + reshape_62, + reshape_63, + full_7, + floor_10, + divide_10, + multiply_10, + add_45, + layer_norm_48, + layer_norm_49, + layer_norm_50, + matmul_35, + add_46, + gelu_6, + matmul_36, + add_47, + assign_82, + floor_11, + divide_11, + multiply_11, + add_48, + layer_norm_51, + layer_norm_52, + layer_norm_53, + reshape_64, + roll_6, + transpose_43, + reshape_65, + reshape_66, + matmul_37, + add_49, + transpose_44, + slice_7, + assign_90, + scale_7, + transpose_45, + matmul_38, + reshape_67, + index_select_7, + transpose_46, + unsqueeze_10, + add_50, + reshape_68, + unsqueeze_11, + add_51, + softmax_7, + transpose_47, + reshape_69, + matmul_39, + add_52, + reshape_70, + transpose_48, + reshape_71, + roll_7, + reshape_72, + full_8, + floor_12, + divide_12, + multiply_12, + add_53, + layer_norm_54, + layer_norm_55, + layer_norm_56, + matmul_40, + add_54, + gelu_7, + matmul_41, + add_55, + assign_93, + floor_13, + divide_13, + multiply_13, + add_56, + layer_norm_57, + layer_norm_58, + layer_norm_59, + reshape_73, + transpose_49, + reshape_74, + reshape_75, + matmul_42, + add_57, + transpose_50, + slice_8, + assign_100, + scale_8, + transpose_51, + matmul_43, + reshape_76, + index_select_8, + transpose_52, + unsqueeze_12, + softmax_8, + transpose_53, + reshape_77, + matmul_44, + add_58, + reshape_78, + transpose_54, + reshape_79, + reshape_80, + full_9, + floor_14, + divide_14, + multiply_14, + add_59, + layer_norm_60, + layer_norm_61, + layer_norm_62, + matmul_45, + add_60, + gelu_8, + matmul_46, + add_61, + assign_102, + floor_15, + divide_15, + multiply_15, + add_62, + layer_norm_63, + layer_norm_64, + layer_norm_65, + reshape_81, + roll_8, + transpose_55, + reshape_82, + reshape_83, + matmul_47, + add_63, + transpose_56, + slice_9, + assign_110, + scale_9, + transpose_57, + matmul_48, + reshape_84, + index_select_9, + transpose_58, + unsqueeze_13, + add_64, + reshape_85, + unsqueeze_14, + add_65, + softmax_9, + transpose_59, + reshape_86, + matmul_49, + add_66, + reshape_87, + transpose_60, + reshape_88, + roll_9, + reshape_89, + full_10, + floor_16, + divide_16, + multiply_16, + add_67, + layer_norm_66, + layer_norm_67, + layer_norm_68, + matmul_50, + add_68, + gelu_9, + matmul_51, + add_69, + assign_113, + floor_17, + divide_17, + multiply_17, + add_70, + layer_norm_69, + layer_norm_70, + layer_norm_71, + reshape_90, + transpose_61, + reshape_91, + reshape_92, + matmul_52, + add_71, + transpose_62, + slice_10, + assign_120, + scale_10, + transpose_63, + matmul_53, + reshape_93, + index_select_10, + transpose_64, + unsqueeze_15, + softmax_10, + transpose_65, + reshape_94, + matmul_54, + add_72, + reshape_95, + transpose_66, + reshape_96, + reshape_97, + full_11, + floor_18, + divide_18, + multiply_18, + add_73, + layer_norm_72, + layer_norm_73, + layer_norm_74, + matmul_55, + add_74, + gelu_10, + matmul_56, + add_75, + assign_122, + floor_19, + divide_19, + multiply_19, + add_76, + layer_norm_75, + layer_norm_76, + layer_norm_77, + reshape_98, + roll_10, + transpose_67, + reshape_99, + reshape_100, + matmul_57, + add_77, + transpose_68, + slice_11, + assign_130, + scale_11, + transpose_69, + matmul_58, + reshape_101, + index_select_11, + transpose_70, + unsqueeze_16, + add_78, + reshape_102, + unsqueeze_17, + add_79, + softmax_11, + transpose_71, + reshape_103, + matmul_59, + add_80, + reshape_104, + transpose_72, + reshape_105, + roll_11, + reshape_106, + full_12, + floor_20, + divide_20, + multiply_20, + add_81, + layer_norm_78, + layer_norm_79, + layer_norm_80, + matmul_60, + add_82, + gelu_11, + matmul_61, + add_83, + assign_133, + floor_21, + divide_21, + multiply_21, + add_84, + layer_norm_81, + layer_norm_82, + layer_norm_83, + reshape_107, + transpose_73, + reshape_108, + reshape_109, + matmul_62, + add_85, + transpose_74, + slice_12, + assign_140, + scale_12, + transpose_75, + matmul_63, + reshape_110, + index_select_12, + transpose_76, + unsqueeze_18, + softmax_12, + transpose_77, + reshape_111, + matmul_64, + add_86, + reshape_112, + transpose_78, + reshape_113, + reshape_114, + full_13, + floor_22, + divide_22, + multiply_22, + add_87, + layer_norm_84, + layer_norm_85, + layer_norm_86, + matmul_65, + add_88, + gelu_12, + matmul_66, + add_89, + assign_142, + floor_23, + divide_23, + multiply_23, + add_90, + layer_norm_87, + layer_norm_88, + layer_norm_89, + reshape_115, + roll_12, + transpose_79, + reshape_116, + reshape_117, + matmul_67, + add_91, + transpose_80, + slice_13, + assign_150, + scale_13, + transpose_81, + matmul_68, + reshape_118, + index_select_13, + transpose_82, + unsqueeze_19, + add_92, + reshape_119, + unsqueeze_20, + add_93, + softmax_13, + transpose_83, + reshape_120, + matmul_69, + add_94, + reshape_121, + transpose_84, + reshape_122, + roll_13, + reshape_123, + full_14, + floor_24, + divide_24, + multiply_24, + add_95, + layer_norm_90, + layer_norm_91, + layer_norm_92, + matmul_70, + add_96, + gelu_13, + matmul_71, + add_97, + assign_153, + floor_25, + divide_25, + multiply_25, + add_98, + layer_norm_93, + layer_norm_94, + layer_norm_95, + reshape_124, + transpose_85, + reshape_125, + reshape_126, + matmul_72, + add_99, + transpose_86, + slice_14, + assign_160, + scale_14, + transpose_87, + matmul_73, + reshape_127, + index_select_14, + transpose_88, + unsqueeze_21, + softmax_14, + transpose_89, + reshape_128, + matmul_74, + add_100, + reshape_129, + transpose_90, + reshape_130, + reshape_131, + full_15, + floor_26, + divide_26, + multiply_26, + add_101, + layer_norm_96, + layer_norm_97, + layer_norm_98, + matmul_75, + add_102, + gelu_14, + matmul_76, + add_103, + assign_162, + floor_27, + divide_27, + multiply_27, + add_104, + layer_norm_99, + layer_norm_100, + layer_norm_101, + reshape_132, + roll_14, + transpose_91, + reshape_133, + reshape_134, + matmul_77, + add_105, + transpose_92, + slice_15, + assign_170, + scale_15, + transpose_93, + matmul_78, + reshape_135, + index_select_15, + transpose_94, + unsqueeze_22, + add_106, + reshape_136, + unsqueeze_23, + add_107, + softmax_15, + transpose_95, + reshape_137, + matmul_79, + add_108, + reshape_138, + transpose_96, + reshape_139, + roll_15, + reshape_140, + full_16, + floor_28, + divide_28, + multiply_28, + add_109, + layer_norm_102, + layer_norm_103, + layer_norm_104, + matmul_80, + add_110, + gelu_15, + matmul_81, + add_111, + assign_173, + floor_29, + divide_29, + multiply_29, + add_112, + layer_norm_105, + layer_norm_106, + layer_norm_107, + reshape_141, + transpose_97, + reshape_142, + reshape_143, + matmul_82, + add_113, + transpose_98, + slice_16, + assign_180, + scale_16, + transpose_99, + matmul_83, + reshape_144, + index_select_16, + transpose_100, + unsqueeze_24, + softmax_16, + transpose_101, + reshape_145, + matmul_84, + add_114, + reshape_146, + transpose_102, + reshape_147, + reshape_148, + full_17, + floor_30, + divide_30, + multiply_30, + add_115, + layer_norm_108, + layer_norm_109, + layer_norm_110, + matmul_85, + add_116, + gelu_16, + matmul_86, + add_117, + assign_182, + floor_31, + divide_31, + multiply_31, + add_118, + layer_norm_111, + layer_norm_112, + layer_norm_113, + reshape_149, + roll_16, + transpose_103, + reshape_150, + reshape_151, + matmul_87, + add_119, + transpose_104, + slice_17, + assign_190, + scale_17, + transpose_105, + matmul_88, + reshape_152, + index_select_17, + transpose_106, + unsqueeze_25, + add_120, + reshape_153, + unsqueeze_26, + add_121, + softmax_17, + transpose_107, + reshape_154, + matmul_89, + add_122, + reshape_155, + transpose_108, + reshape_156, + roll_17, + reshape_157, + full_18, + floor_32, + divide_32, + multiply_32, + add_123, + layer_norm_114, + layer_norm_115, + layer_norm_116, + matmul_90, + add_124, + gelu_17, + matmul_91, + add_125, + assign_193, + floor_33, + divide_33, + multiply_33, + add_126, + layer_norm_117, + layer_norm_118, + layer_norm_119, + reshape_158, + transpose_109, + reshape_159, + reshape_160, + matmul_92, + add_127, + transpose_110, + slice_18, + assign_200, + scale_18, + transpose_111, + matmul_93, + reshape_161, + index_select_18, + transpose_112, + unsqueeze_27, + softmax_18, + transpose_113, + reshape_162, + matmul_94, + add_128, + reshape_163, + transpose_114, + reshape_164, + reshape_165, + full_19, + floor_34, + divide_34, + multiply_34, + add_129, + layer_norm_120, + layer_norm_121, + layer_norm_122, + matmul_95, + add_130, + gelu_18, + matmul_96, + add_131, + assign_202, + floor_35, + divide_35, + multiply_35, + add_132, + layer_norm_123, + layer_norm_124, + layer_norm_125, + reshape_166, + roll_18, + transpose_115, + reshape_167, + reshape_168, + matmul_97, + add_133, + transpose_116, + slice_19, + assign_210, + scale_19, + transpose_117, + matmul_98, + reshape_169, + index_select_19, + transpose_118, + unsqueeze_28, + add_134, + reshape_170, + unsqueeze_29, + add_135, + softmax_19, + transpose_119, + reshape_171, + matmul_99, + add_136, + reshape_172, + transpose_120, + reshape_173, + roll_19, + reshape_174, + full_20, + floor_36, + divide_36, + multiply_36, + add_137, + layer_norm_126, + layer_norm_127, + layer_norm_128, + matmul_100, + add_138, + gelu_19, + matmul_101, + add_139, + assign_213, + floor_37, + divide_37, + multiply_37, + add_140, + layer_norm_129, + layer_norm_130, + layer_norm_131, + reshape_175, + transpose_121, + reshape_176, + reshape_177, + matmul_102, + add_141, + transpose_122, + slice_20, + assign_220, + scale_20, + transpose_123, + matmul_103, + reshape_178, + index_select_20, + transpose_124, + unsqueeze_30, + softmax_20, + transpose_125, + reshape_179, + matmul_104, + add_142, + reshape_180, + transpose_126, + reshape_181, + reshape_182, + full_21, + floor_38, + divide_38, + multiply_38, + add_143, + layer_norm_132, + layer_norm_133, + layer_norm_134, + matmul_105, + add_144, + gelu_20, + matmul_106, + add_145, + assign_222, + floor_39, + divide_39, + multiply_39, + add_146, + layer_norm_135, + layer_norm_136, + layer_norm_137, + reshape_183, + roll_20, + transpose_127, + reshape_184, + reshape_185, + matmul_107, + add_147, + transpose_128, + slice_21, + assign_230, + scale_21, + transpose_129, + matmul_108, + reshape_186, + index_select_21, + transpose_130, + unsqueeze_31, + add_148, + reshape_187, + unsqueeze_32, + add_149, + softmax_21, + transpose_131, + reshape_188, + matmul_109, + add_150, + reshape_189, + transpose_132, + reshape_190, + roll_21, + reshape_191, + full_22, + floor_40, + divide_40, + multiply_40, + add_151, + layer_norm_138, + layer_norm_139, + layer_norm_140, + matmul_110, + add_152, + gelu_21, + matmul_111, + add_153, + assign_233, + floor_41, + divide_41, + multiply_41, + add_154, + reshape_192, + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + assign_246, + concat_2, + reshape_193, + layer_norm_141, + layer_norm_142, + layer_norm_143, + matmul_112, + layer_norm_144, + layer_norm_145, + layer_norm_146, + reshape_194, + transpose_133, + reshape_195, + reshape_196, + matmul_113, + add_155, + transpose_134, + slice_22, + assign_253, + scale_22, + transpose_135, + matmul_114, + reshape_197, + index_select_22, + transpose_136, + unsqueeze_33, + softmax_22, + transpose_137, + reshape_198, + matmul_115, + add_156, + reshape_199, + transpose_138, + reshape_200, + reshape_201, + full_23, + floor_42, + divide_42, + multiply_42, + add_157, + layer_norm_147, + layer_norm_148, + layer_norm_149, + matmul_116, + add_158, + gelu_22, + matmul_117, + add_159, + assign_255, + floor_43, + divide_43, + multiply_43, + add_160, + layer_norm_150, + layer_norm_151, + layer_norm_152, + reshape_202, + roll_22, + transpose_139, + reshape_203, + reshape_204, + matmul_118, + add_161, + transpose_140, + slice_23, + assign_263, + scale_23, + transpose_141, + matmul_119, + reshape_205, + index_select_23, + transpose_142, + unsqueeze_34, + add_162, + reshape_206, + unsqueeze_35, + add_163, + softmax_23, + transpose_143, + reshape_207, + matmul_120, + add_164, + reshape_208, + transpose_144, + reshape_209, + roll_23, + reshape_210, + full_24, + floor_44, + divide_44, + multiply_44, + add_165, + layer_norm_153, + layer_norm_154, + layer_norm_155, + matmul_121, + add_166, + gelu_23, + matmul_122, + add_167, + assign_266, + floor_45, + divide_45, + multiply_45, + add_168, + layer_norm_156, + layer_norm_157, + transpose_145, + unsqueeze_36, + pool2d_0, + squeeze_0, + flatten_0, + matmul_123, + add_169, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/weight_meta.py new file mode 100644 index 00000000..547d11be --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_0/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [1536, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [768, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [384, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [384, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [96, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [96, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [96, 288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [384, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [96, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [96, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [96, 288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [96, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/graph_net.json b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/input_meta.py b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/input_meta.py new file mode 100644 index 00000000..ab7b5204 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [128, 3, 224, 224] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [169, 3] + dtype = "float32" + low = -10.8571 + high = 3.3087 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [169, 3] + dtype = "float32" + low = -10.7209 + high = 3.22038 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [169, 6] + dtype = "float32" + low = -4.5894 + high = 7.8887 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [169, 6] + dtype = "float32" + low = -4.75622 + high = 5.78443 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [169, 12] + dtype = "float32" + low = -8.44948 + high = 4.78312 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [169, 12] + dtype = "float32" + low = -11.168 + high = 10.3585 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [169, 12] + dtype = "float32" + low = -9.90825 + high = 4.3354 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [169, 12] + dtype = "float32" + low = -8.1845 + high = 3.88734 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [169, 12] + dtype = "float32" + low = -5.03745 + high = 3.21106 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [169, 12] + dtype = "float32" + low = -6.80933 + high = 4.11838 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [169, 12] + dtype = "float32" + low = -10.1308 + high = 4.64291 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [169, 12] + dtype = "float32" + low = -5.80355 + high = 3.56498 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [169, 12] + dtype = "float32" + low = -10.4223 + high = 2.90399 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [169, 12] + dtype = "float32" + low = -8.23529 + high = 3.16362 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [169, 12] + dtype = "float32" + low = -9.31919 + high = 3.19278 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [169, 12] + dtype = "float32" + low = -4.7195 + high = 3.61123 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [169, 12] + dtype = "float32" + low = -9.57186 + high = 2.4857 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [169, 12] + dtype = "float32" + low = -5.86019 + high = 2.54119 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [169, 12] + dtype = "float32" + low = -11.2032 + high = 1.84057 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [169, 12] + dtype = "float32" + low = -11.4153 + high = 2.566 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [169, 12] + dtype = "float32" + low = -12.6668 + high = 1.54754 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [169, 12] + dtype = "float32" + low = -6.02028 + high = 2.94477 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/model.py b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/model.py new file mode 100644 index 00000000..a176377e --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/model.py @@ -0,0 +1,9762 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.shape64: (4xi64) <- (-1x3x224x224xf32) + shape64_0 = paddle._C_ops.shape64(data_0) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [1] + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + shape64_0, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_0 + + # pd_op.conv2d: (-1x96x56x56xf32) <- (-1x3x224x224xf32, 96x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_2 = [1, -1, 1, 1] + + # pd_op.reshape: (1x96x1x1xf32) <- (96xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_2) + del full_int_array_2, parameter_303 + + # pd_op.add: (-1x96x56x56xf32) <- (-1x96x56x56xf32, 1x96x1x1xf32) + add_1 = paddle._C_ops.add(conv2d_0, reshape_0) + del conv2d_0, reshape_0 + + # pd_op.shape64: (4xi64) <- (-1x96x56x56xf32) + shape64_1 = paddle._C_ops.shape64(add_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + shape64_1, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_1 + + # pd_op.flatten: (-1x96x3136xf32) <- (-1x96x56x56xf32) + flatten_0 = paddle._C_ops.flatten(add_1, 2, 3) + del add_1 + + # pd_op.transpose: (-1x3136x96xf32) <- (-1x96x3136xf32) + transpose_0 = paddle._C_ops.transpose(flatten_0, [0, 2, 1]) + del flatten_0 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302, transpose_0 + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_2 = paddle._C_ops.shape64(layer_norm_0) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + shape64_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_2 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("56"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_1 = paddle._C_ops.full( + [], float("96"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_0 = [slice_2, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_0 = paddle._C_ops.stack(combine_0, 0) + del combine_0 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x3136x96xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, stack_0) + del layer_norm_3, stack_0 + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_3 = paddle._C_ops.shape64(reshape_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + shape64_3, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_3 + + # pd_op.full: (xi64) <- () + full_2 = paddle._C_ops.full( + [], float("8"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_3 = paddle._C_ops.full( + [], float("7"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_1 = [slice_3, full_2, full_3, full_2, full_3, full_1] + del slice_3 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_1 = paddle._C_ops.stack(combine_1, 0) + del combine_1 + + # pd_op.reshape: (-1x8x7x8x7x96xf32) <- (-1x56x56x96xf32, 6xi64) + reshape_2 = paddle._C_ops.reshape(reshape_1, stack_1) + del reshape_1, stack_1 + + # pd_op.transpose: (-1x8x8x7x7x96xf32) <- (-1x8x7x8x7x96xf32) + transpose_1 = paddle._C_ops.transpose(reshape_2, [0, 1, 3, 2, 4, 5]) + del reshape_2 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_3 = [-1, 7, 7, 96] + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x8x8x7x7x96xf32, 4xi64) + reshape_3 = paddle._C_ops.reshape(transpose_1, full_int_array_3) + del transpose_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [-1, 49, 96] + + # pd_op.reshape: (-1x49x96xf32) <- (-1x7x7x96xf32, 3xi64) + reshape_4 = paddle._C_ops.reshape(reshape_3, full_int_array_4) + del reshape_3 + + # pd_op.shape64: (3xi64) <- (-1x49x96xf32) + shape64_4 = paddle._C_ops.shape64(reshape_4) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + shape64_4, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_4 + + # pd_op.matmul: (-1x49x288xf32) <- (-1x49x96xf32, 96x288xf32) + matmul_0 = paddle._C_ops.matmul(reshape_4, parameter_298, False, False) + del parameter_298, reshape_4 + + # pd_op.add: (-1x49x288xf32) <- (-1x49x288xf32, 288xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_297) + del matmul_0, parameter_297 + + # pd_op.full: (xi64) <- () + full_4 = paddle._C_ops.full( + [], float("49"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_5 = paddle._C_ops.full( + [], float("3"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_6 = paddle._C_ops.full( + [], float("32"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_2 = [slice_4, full_4, full_5, full_5, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_2 = paddle._C_ops.stack(combine_2, 0) + del combine_2 + + # pd_op.reshape: (-1x49x3x3x32xf32) <- (-1x49x288xf32, 5xi64) + reshape_5 = paddle._C_ops.reshape(add_2, stack_2) + del add_2, stack_2 + + # pd_op.transpose: (3x-1x3x49x32xf32) <- (-1x49x3x3x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_5, [2, 0, 3, 1, 4]) + del reshape_5 + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_5 = [2] + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_6 = [3] + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_2 + + # pd_op.full: (1xf32) <- () + full_7 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (-1x3x49x32xf32) <- (-1x3x49x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_5, full_7, float("0"), True) + del slice_5 + + # pd_op.transpose: (-1x3x32x49xf32) <- (-1x3x49x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_6, [0, 1, 3, 2]) + del slice_6 + + # pd_op.matmul: (-1x3x49x49xf32) <- (-1x3x49x32xf32, -1x3x32x49xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + del scale_0, transpose_3 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [-1] + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_6 = paddle._C_ops.reshape(data_1, full_int_array_7) + del data_1 + + # pd_op.index_select: (2401x3xf32) <- (169x3xf32, 2401xi64) + index_select_0 = paddle._C_ops.index_select(data_2, reshape_6, 0) + del data_2, reshape_6 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_8 = [49, 49, -1] + + # pd_op.reshape: (49x49x3xf32) <- (2401x3xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(index_select_0, full_int_array_8) + del index_select_0 + + # pd_op.transpose: (3x49x49xf32) <- (49x49x3xf32) + transpose_4 = paddle._C_ops.transpose(reshape_7, [2, 0, 1]) + del reshape_7 + + # pd_op.unsqueeze: (1x3x49x49xf32) <- (3x49x49xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_0) + del transpose_4 + + # pd_op.add: (-1x3x49x49xf32) <- (-1x3x49x49xf32, 1x3x49x49xf32) + add_3 = paddle._C_ops.add(matmul_1, unsqueeze_0) + del matmul_1, unsqueeze_0 + + # pd_op.softmax: (-1x3x49x49xf32) <- (-1x3x49x49xf32) + softmax_0 = paddle._C_ops.softmax(add_3, -1) + del add_3 + + # pd_op.matmul: (-1x3x49x32xf32) <- (-1x3x49x49xf32, -1x3x49x32xf32) + matmul_2 = paddle._C_ops.matmul(softmax_0, slice_7, False, False) + del slice_7, softmax_0 + + # pd_op.transpose: (-1x49x3x32xf32) <- (-1x3x49x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_2, [0, 2, 1, 3]) + del matmul_2 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_3 = [slice_4, full_4, full_1] + del slice_4 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_3 = paddle._C_ops.stack(combine_3, 0) + del combine_3 + + # pd_op.reshape: (-1x49x96xf32) <- (-1x49x3x32xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(transpose_5, stack_3) + del stack_3, transpose_5 + + # pd_op.matmul: (-1x49x96xf32) <- (-1x49x96xf32, 96x96xf32) + matmul_3 = paddle._C_ops.matmul(reshape_8, parameter_296, False, False) + del parameter_296, reshape_8 + + # pd_op.add: (-1x49x96xf32) <- (-1x49x96xf32, 96xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_295) + del matmul_3, parameter_295 + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x49x96xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_4, full_int_array_3) + del add_4 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_9 = [-1, 8, 8, 7, 7, 96] + + # pd_op.reshape: (-1x8x8x7x7x96xf32) <- (-1x7x7x96xf32, 6xi64) + reshape_10 = paddle._C_ops.reshape(reshape_9, full_int_array_9) + del reshape_9 + + # pd_op.transpose: (-1x8x7x8x7x96xf32) <- (-1x8x8x7x7x96xf32) + transpose_6 = paddle._C_ops.transpose(reshape_10, [0, 1, 3, 2, 4, 5]) + del reshape_10 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [-1, 56, 56, 96] + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x8x7x8x7x96xf32, 4xi64) + reshape_11 = paddle._C_ops.reshape(transpose_6, full_int_array_10) + del transpose_6 + + # pd_op.full: (xi64) <- () + full_8 = paddle._C_ops.full( + [], float("3136"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_4 = [slice_2, full_8, full_1] + del slice_2 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_4 = paddle._C_ops.stack(combine_4, 0) + del combine_4 + + # pd_op.reshape: (-1x3136x96xf32) <- (-1x56x56x96xf32, 3xi64) + reshape_12 = paddle._C_ops.reshape(reshape_11, stack_4) + del reshape_11, stack_4 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_5 = paddle._C_ops.add(layer_norm_0, reshape_12) + del layer_norm_0, reshape_12 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_5, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (-1x3136x384xf32) <- (-1x3136x96xf32, 96x384xf32) + matmul_4 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del layer_norm_6, parameter_292 + + # pd_op.add: (-1x3136x384xf32) <- (-1x3136x384xf32, 384xf32) + add_6 = paddle._C_ops.add(matmul_4, parameter_291) + del matmul_4, parameter_291 + + # pd_op.gelu: (-1x3136x384xf32) <- (-1x3136x384xf32) + gelu_0 = paddle._C_ops.gelu(add_6, False) + del add_6 + + # pd_op.matmul: (-1x3136x96xf32) <- (-1x3136x384xf32, 384x96xf32) + matmul_5 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del gelu_0, parameter_290 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, 96xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_289) + del matmul_5, parameter_289 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_8 = paddle._C_ops.add(add_5, add_7) + del add_5, add_7 + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_5 = paddle._C_ops.shape64(add_8) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + shape64_5, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_5 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_5 = [slice_8, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_5 = paddle._C_ops.stack(combine_5, 0) + del combine_5 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x3136x96xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(layer_norm_9, stack_5) + del layer_norm_9, stack_5 + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_6 = paddle._C_ops.shape64(reshape_13) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + shape64_6, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_11 = [-3, -3] + + # pd_op.roll: (-1x56x56x96xf32) <- (-1x56x56x96xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_13, full_int_array_11, [1, 2]) + del reshape_13 + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_7 = paddle._C_ops.shape64(roll_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + shape64_7, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_7 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_6 = [slice_10, full_2, full_3, full_2, full_3, full_1] + del full_2, slice_10 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_6 = paddle._C_ops.stack(combine_6, 0) + del combine_6 + + # pd_op.reshape: (-1x8x7x8x7x96xf32) <- (-1x56x56x96xf32, 6xi64) + reshape_14 = paddle._C_ops.reshape(roll_0, stack_6) + del roll_0, stack_6 + + # pd_op.transpose: (-1x8x8x7x7x96xf32) <- (-1x8x7x8x7x96xf32) + transpose_7 = paddle._C_ops.transpose(reshape_14, [0, 1, 3, 2, 4, 5]) + del reshape_14 + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x8x8x7x7x96xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(transpose_7, full_int_array_3) + del transpose_7 + + # pd_op.reshape: (-1x49x96xf32) <- (-1x7x7x96xf32, 3xi64) + reshape_16 = paddle._C_ops.reshape(reshape_15, full_int_array_4) + del full_int_array_4, reshape_15 + + # pd_op.full: (1x56x56x1xf32) <- () + full_9 = paddle._C_ops.full( + [1, 56, 56, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_12 = [0, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_13 = [-7, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_14 = [1, 1] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_9, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_9 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_15 = [0, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_16 = [-7, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_17 = [0, -3] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_18 = [-7, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_19 = [-7, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_20 = [-3, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [-3, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-3, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [2147483647, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [2147483647, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [2147483647, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_26 = [1, 8, 7, 8, 7, 1] + + # pd_op.reshape: (1x8x7x8x7x1xf32) <- (1x56x56x1xf32, 6xi64) + reshape_17 = paddle._C_ops.reshape(set_value__0, full_int_array_26) + del full_int_array_26 + + # pd_op.transpose: (1x8x8x7x7x1xf32) <- (1x8x7x8x7x1xf32) + transpose_8 = paddle._C_ops.transpose(reshape_17, [0, 1, 3, 2, 4, 5]) + del reshape_17 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_27 = [-1, 7, 7, 1] + + # pd_op.reshape: (64x7x7x1xf32) <- (1x8x8x7x7x1xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(transpose_8, full_int_array_27) + del transpose_8 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [-1, 49] + + # pd_op.reshape: (64x49xf32) <- (64x7x7x1xf32, 2xi64) + reshape_19 = paddle._C_ops.reshape(reshape_18, full_int_array_28) + del reshape_18 + + # pd_op.unsqueeze: (64x1x49xf32) <- (64x49xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(reshape_19, full_int_array_1) + + # pd_op.unsqueeze: (64x49x1xf32) <- (64x49xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(reshape_19, full_int_array_5) + del reshape_19 + + # pd_op.subtract: (64x49x49xf32) <- (64x1x49xf32, 64x49x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_1, unsqueeze_2) + del unsqueeze_1, unsqueeze_2 + + # pd_op.full: (xf32) <- () + full_10 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x49x49xb) <- (64x49x49xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_10) + + # pd_op.full: (64x49x49xf32) <- () + full_11 = paddle._C_ops.full( + [64, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_11, subtract_0) + del full_11, not_equal_0, subtract_0 + + # pd_op.equal: (64x49x49xb) <- (64x49x49xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_10) + + # pd_op.full: (64x49x49xf32) <- () + full_12 = paddle._C_ops.full( + [64, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_1 = paddle._C_ops.where(equal_0, full_12, where_0) + del equal_0, full_12, where_0 + + # pd_op.shape64: (3xi64) <- (-1x49x96xf32) + shape64_8 = paddle._C_ops.shape64(reshape_16) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + shape64_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_8 + + # pd_op.matmul: (-1x49x288xf32) <- (-1x49x96xf32, 96x288xf32) + matmul_6 = paddle._C_ops.matmul(reshape_16, parameter_286, False, False) + del parameter_286, reshape_16 + + # pd_op.add: (-1x49x288xf32) <- (-1x49x288xf32, 288xf32) + add_9 = paddle._C_ops.add(matmul_6, parameter_285) + del matmul_6, parameter_285 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_7 = [slice_11, full_4, full_5, full_5, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_7 = paddle._C_ops.stack(combine_7, 0) + del combine_7 + + # pd_op.reshape: (-1x49x3x3x32xf32) <- (-1x49x288xf32, 5xi64) + reshape_20 = paddle._C_ops.reshape(add_9, stack_7) + del add_9, stack_7 + + # pd_op.transpose: (3x-1x3x49x32xf32) <- (-1x49x3x3x32xf32) + transpose_9 = paddle._C_ops.transpose(reshape_20, [2, 0, 3, 1, 4]) + del reshape_20 + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_9 + + # pd_op.scale: (-1x3x49x32xf32) <- (-1x3x49x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_12, full_7, float("0"), True) + del slice_12 + + # pd_op.transpose: (-1x3x32x49xf32) <- (-1x3x49x32xf32) + transpose_10 = paddle._C_ops.transpose(slice_13, [0, 1, 3, 2]) + del slice_13 + + # pd_op.matmul: (-1x3x49x49xf32) <- (-1x3x49x32xf32, -1x3x32x49xf32) + matmul_7 = paddle._C_ops.matmul(scale_1, transpose_10, False, False) + del scale_1, transpose_10 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_21 = paddle._C_ops.reshape(data_3, full_int_array_7) + del data_3 + + # pd_op.index_select: (2401x3xf32) <- (169x3xf32, 2401xi64) + index_select_1 = paddle._C_ops.index_select(data_4, reshape_21, 0) + del data_4, reshape_21 + + # pd_op.reshape: (49x49x3xf32) <- (2401x3xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(index_select_1, full_int_array_8) + del index_select_1 + + # pd_op.transpose: (3x49x49xf32) <- (49x49x3xf32) + transpose_11 = paddle._C_ops.transpose(reshape_22, [2, 0, 1]) + del reshape_22 + + # pd_op.unsqueeze: (1x3x49x49xf32) <- (3x49x49xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_11, full_int_array_0) + del transpose_11 + + # pd_op.add: (-1x3x49x49xf32) <- (-1x3x49x49xf32, 1x3x49x49xf32) + add_10 = paddle._C_ops.add(matmul_7, unsqueeze_3) + del matmul_7, unsqueeze_3 + + # pd_op.full: (xi64) <- () + full_13 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_0 = paddle._C_ops.floor_divide(slice_11, full_13) + del full_13 + + # pd_op.full: (xi64) <- () + full_14 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_8 = [floor_divide_0, full_14, full_5, full_4, full_4] + del floor_divide_0, full_14 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_8 = paddle._C_ops.stack(combine_8, 0) + del combine_8 + + # pd_op.reshape: (-1x64x3x49x49xf32) <- (-1x3x49x49xf32, 5xi64) + reshape_23 = paddle._C_ops.reshape(add_10, stack_8) + del add_10, stack_8 + + # pd_op.unsqueeze: (64x1x49x49xf32) <- (64x49x49xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(where_1, full_int_array_1) + del where_1 + + # pd_op.unsqueeze: (1x64x1x49x49xf32) <- (64x1x49x49xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_4, full_int_array_0) + del unsqueeze_4 + + # pd_op.add: (-1x64x3x49x49xf32) <- (-1x64x3x49x49xf32, 1x64x1x49x49xf32) + add_11 = paddle._C_ops.add(reshape_23, unsqueeze_5) + del reshape_23, unsqueeze_5 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_9 = [slice_11, full_5, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_9 = paddle._C_ops.stack(combine_9, 0) + del combine_9 + + # pd_op.reshape: (-1x3x49x49xf32) <- (-1x64x3x49x49xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_11, stack_9) + del add_11, stack_9 + + # pd_op.softmax: (-1x3x49x49xf32) <- (-1x3x49x49xf32) + softmax_1 = paddle._C_ops.softmax(reshape_24, -1) + del reshape_24 + + # pd_op.matmul: (-1x3x49x32xf32) <- (-1x3x49x49xf32, -1x3x49x32xf32) + matmul_8 = paddle._C_ops.matmul(softmax_1, slice_14, False, False) + del slice_14, softmax_1 + + # pd_op.transpose: (-1x49x3x32xf32) <- (-1x3x49x32xf32) + transpose_12 = paddle._C_ops.transpose(matmul_8, [0, 2, 1, 3]) + del matmul_8 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_10 = [slice_11, full_4, full_1] + del slice_11 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_10 = paddle._C_ops.stack(combine_10, 0) + del combine_10 + + # pd_op.reshape: (-1x49x96xf32) <- (-1x49x3x32xf32, 3xi64) + reshape_25 = paddle._C_ops.reshape(transpose_12, stack_10) + del stack_10, transpose_12 + + # pd_op.matmul: (-1x49x96xf32) <- (-1x49x96xf32, 96x96xf32) + matmul_9 = paddle._C_ops.matmul(reshape_25, parameter_284, False, False) + del parameter_284, reshape_25 + + # pd_op.add: (-1x49x96xf32) <- (-1x49x96xf32, 96xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_283) + del matmul_9, parameter_283 + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x49x96xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_12, full_int_array_3) + del add_12, full_int_array_3 + + # pd_op.reshape: (-1x8x8x7x7x96xf32) <- (-1x7x7x96xf32, 6xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, full_int_array_9) + del full_int_array_9, reshape_26 + + # pd_op.transpose: (-1x8x7x8x7x96xf32) <- (-1x8x8x7x7x96xf32) + transpose_13 = paddle._C_ops.transpose(reshape_27, [0, 1, 3, 2, 4, 5]) + del reshape_27 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x8x7x8x7x96xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(transpose_13, full_int_array_10) + del full_int_array_10, transpose_13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [3, 3] + + # pd_op.roll: (-1x56x56x96xf32) <- (-1x56x56x96xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_28, full_int_array_29, [1, 2]) + del reshape_28 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_11 = [slice_8, full_8, full_1] + del full_8, slice_8 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_11 = paddle._C_ops.stack(combine_11, 0) + del combine_11 + + # pd_op.reshape: (-1x3136x96xf32) <- (-1x56x56x96xf32, 3xi64) + reshape_29 = paddle._C_ops.reshape(roll_1, stack_11) + del roll_1, stack_11 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_13 = paddle._C_ops.add(add_8, reshape_29) + del add_8, reshape_29 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_13, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (-1x3136x384xf32) <- (-1x3136x96xf32, 96x384xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del layer_norm_12, parameter_280 + + # pd_op.add: (-1x3136x384xf32) <- (-1x3136x384xf32, 384xf32) + add_14 = paddle._C_ops.add(matmul_10, parameter_279) + del matmul_10, parameter_279 + + # pd_op.gelu: (-1x3136x384xf32) <- (-1x3136x384xf32) + gelu_1 = paddle._C_ops.gelu(add_14, False) + del add_14 + + # pd_op.matmul: (-1x3136x96xf32) <- (-1x3136x384xf32, 384x96xf32) + matmul_11 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del gelu_1, parameter_278 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, 96xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_277) + del matmul_11, parameter_277 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_16 = paddle._C_ops.add(add_13, add_15) + del add_13, add_15 + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_9 = paddle._C_ops.shape64(add_16) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + shape64_9, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_9 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_12 = [slice_15, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_12 = paddle._C_ops.stack(combine_12, 0) + del combine_12 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x3136x96xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_16, stack_12) + del add_16, stack_12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_30 = [2, 2] + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_31 = [1, 0] + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [0, 1] + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_10 = paddle._C_ops.shape64(reshape_30) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + shape64_10, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_10 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_13 = [slice_16, full_0, full_0, full_1] + del full_0, full_1, slice_16 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_13 = paddle._C_ops.stack(combine_13, 0) + del combine_13 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x56x56x96xf32, 4xi64) + reshape_31 = paddle._C_ops.reshape(reshape_30, stack_13) + del reshape_30, stack_13 + + # pd_op.full: (1xi32) <- () + full_15 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([-1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32]) <- (-1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32) + combine_14 = [ + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + ] + del strided_slice_0, strided_slice_1, strided_slice_2, strided_slice_3 + + # pd_op.concat: (-1x28x28x384xf32) <- ([-1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_14, full_15) + del combine_14 + + # pd_op.full: (xi64) <- () + full_16 = paddle._C_ops.full( + [], float("-1"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_17 = paddle._C_ops.full( + [], float("384"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_15 = [slice_15, full_16, full_17] + del slice_15 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_14 = paddle._C_ops.stack(combine_15, 0) + del combine_15 + + # pd_op.reshape: (-1x-1x384xf32) <- (-1x28x28x384xf32, 3xi64) + reshape_32 = paddle._C_ops.reshape(concat_0, stack_14) + del concat_0, stack_14 + + # pd_op.layer_norm: (-1x-1x384xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x384xf32, 384xf32, 384xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_32, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276, reshape_32 + + # pd_op.matmul: (-1x-1x192xf32) <- (-1x-1x384xf32, 384x192xf32) + matmul_12 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del layer_norm_15, parameter_274 + + # pd_op.shape64: (3xi64) <- (-1x-1x192xf32) + shape64_11 = paddle._C_ops.shape64(matmul_12) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + shape64_11, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_11 + + # pd_op.shape64: (3xi64) <- (-1x-1x192xf32) + shape64_12 = paddle._C_ops.shape64(matmul_12) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + shape64_12, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_12 + + # pd_op.layer_norm: (-1x-1x192xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x192xf32, 192xf32, 192xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_12, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full: (xi64) <- () + full_18 = paddle._C_ops.full( + [], float("28"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_19 = paddle._C_ops.full( + [], float("192"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_16 = [slice_17, full_18, full_18, full_19] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_15 = paddle._C_ops.stack(combine_16, 0) + del combine_16 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x-1x192xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(layer_norm_18, stack_15) + del layer_norm_18, stack_15 + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_13 = paddle._C_ops.shape64(reshape_33) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + shape64_13, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_13 + + # pd_op.full: (xi64) <- () + full_20 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_17 = [slice_19, full_20, full_3, full_20, full_3, full_19] + del slice_19 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_16 = paddle._C_ops.stack(combine_17, 0) + del combine_17 + + # pd_op.reshape: (-1x4x7x4x7x192xf32) <- (-1x28x28x192xf32, 6xi64) + reshape_34 = paddle._C_ops.reshape(reshape_33, stack_16) + del reshape_33, stack_16 + + # pd_op.transpose: (-1x4x4x7x7x192xf32) <- (-1x4x7x4x7x192xf32) + transpose_14 = paddle._C_ops.transpose(reshape_34, [0, 1, 3, 2, 4, 5]) + del reshape_34 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_33 = [-1, 7, 7, 192] + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x4x4x7x7x192xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_14, full_int_array_33) + del transpose_14 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_34 = [-1, 49, 192] + + # pd_op.reshape: (-1x49x192xf32) <- (-1x7x7x192xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(reshape_35, full_int_array_34) + del reshape_35 + + # pd_op.shape64: (3xi64) <- (-1x49x192xf32) + shape64_14 = paddle._C_ops.shape64(reshape_36) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + shape64_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_14 + + # pd_op.matmul: (-1x49x576xf32) <- (-1x49x192xf32, 192x576xf32) + matmul_13 = paddle._C_ops.matmul(reshape_36, parameter_271, False, False) + del parameter_271, reshape_36 + + # pd_op.add: (-1x49x576xf32) <- (-1x49x576xf32, 576xf32) + add_17 = paddle._C_ops.add(matmul_13, parameter_270) + del matmul_13, parameter_270 + + # pd_op.full: (xi64) <- () + full_21 = paddle._C_ops.full( + [], float("6"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_18 = [slice_20, full_4, full_5, full_21, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_17 = paddle._C_ops.stack(combine_18, 0) + del combine_18 + + # pd_op.reshape: (-1x49x3x6x32xf32) <- (-1x49x576xf32, 5xi64) + reshape_37 = paddle._C_ops.reshape(add_17, stack_17) + del add_17, stack_17 + + # pd_op.transpose: (3x-1x6x49x32xf32) <- (-1x49x3x6x32xf32) + transpose_15 = paddle._C_ops.transpose(reshape_37, [2, 0, 3, 1, 4]) + del reshape_37 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_15 + + # pd_op.scale: (-1x6x49x32xf32) <- (-1x6x49x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_21, full_7, float("0"), True) + del slice_21 + + # pd_op.transpose: (-1x6x32x49xf32) <- (-1x6x49x32xf32) + transpose_16 = paddle._C_ops.transpose(slice_22, [0, 1, 3, 2]) + del slice_22 + + # pd_op.matmul: (-1x6x49x49xf32) <- (-1x6x49x32xf32, -1x6x32x49xf32) + matmul_14 = paddle._C_ops.matmul(scale_2, transpose_16, False, False) + del scale_2, transpose_16 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_38 = paddle._C_ops.reshape(data_5, full_int_array_7) + del data_5 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_2 = paddle._C_ops.index_select(data_6, reshape_38, 0) + del data_6, reshape_38 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(index_select_2, full_int_array_8) + del index_select_2 + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_17 = paddle._C_ops.transpose(reshape_39, [2, 0, 1]) + del reshape_39 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_17, full_int_array_0) + del transpose_17 + + # pd_op.add: (-1x6x49x49xf32) <- (-1x6x49x49xf32, 1x6x49x49xf32) + add_18 = paddle._C_ops.add(matmul_14, unsqueeze_6) + del matmul_14, unsqueeze_6 + + # pd_op.softmax: (-1x6x49x49xf32) <- (-1x6x49x49xf32) + softmax_2 = paddle._C_ops.softmax(add_18, -1) + del add_18 + + # pd_op.matmul: (-1x6x49x32xf32) <- (-1x6x49x49xf32, -1x6x49x32xf32) + matmul_15 = paddle._C_ops.matmul(softmax_2, slice_23, False, False) + del slice_23, softmax_2 + + # pd_op.transpose: (-1x49x6x32xf32) <- (-1x6x49x32xf32) + transpose_18 = paddle._C_ops.transpose(matmul_15, [0, 2, 1, 3]) + del matmul_15 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_19 = [slice_20, full_4, full_19] + del slice_20 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_18 = paddle._C_ops.stack(combine_19, 0) + del combine_19 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x49x6x32xf32, 3xi64) + reshape_40 = paddle._C_ops.reshape(transpose_18, stack_18) + del stack_18, transpose_18 + + # pd_op.matmul: (-1x49x192xf32) <- (-1x49x192xf32, 192x192xf32) + matmul_16 = paddle._C_ops.matmul(reshape_40, parameter_269, False, False) + del parameter_269, reshape_40 + + # pd_op.add: (-1x49x192xf32) <- (-1x49x192xf32, 192xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_268) + del matmul_16, parameter_268 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x49x192xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_19, full_int_array_33) + del add_19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [-1, 4, 4, 7, 7, 192] + + # pd_op.reshape: (-1x4x4x7x7x192xf32) <- (-1x7x7x192xf32, 6xi64) + reshape_42 = paddle._C_ops.reshape(reshape_41, full_int_array_35) + del reshape_41 + + # pd_op.transpose: (-1x4x7x4x7x192xf32) <- (-1x4x4x7x7x192xf32) + transpose_19 = paddle._C_ops.transpose(reshape_42, [0, 1, 3, 2, 4, 5]) + del reshape_42 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 28, 28, 192] + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x4x7x4x7x192xf32, 4xi64) + reshape_43 = paddle._C_ops.reshape(transpose_19, full_int_array_36) + del transpose_19 + + # pd_op.full: (xi64) <- () + full_22 = paddle._C_ops.full( + [], float("784"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_20 = [slice_17, full_22, full_19] + del slice_17 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_19 = paddle._C_ops.stack(combine_20, 0) + del combine_20 + + # pd_op.reshape: (-1x784x192xf32) <- (-1x28x28x192xf32, 3xi64) + reshape_44 = paddle._C_ops.reshape(reshape_43, stack_19) + del reshape_43, stack_19 + + # pd_op.add: (-1x784x192xf32) <- (-1x-1x192xf32, -1x784x192xf32) + add_20 = paddle._C_ops.add(matmul_12, reshape_44) + del matmul_12, reshape_44 + + # pd_op.layer_norm: (-1x784x192xf32, -1x784xf32, -1x784xf32) <- (-1x784x192xf32, 192xf32, 192xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (-1x784x768xf32) <- (-1x784x192xf32, 192x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del layer_norm_21, parameter_265 + + # pd_op.add: (-1x784x768xf32) <- (-1x784x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_264) + del matmul_17, parameter_264 + + # pd_op.gelu: (-1x784x768xf32) <- (-1x784x768xf32) + gelu_2 = paddle._C_ops.gelu(add_21, False) + del add_21 + + # pd_op.matmul: (-1x784x192xf32) <- (-1x784x768xf32, 768x192xf32) + matmul_18 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del gelu_2, parameter_263 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, 192xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_262) + del matmul_18, parameter_262 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, -1x784x192xf32) + add_23 = paddle._C_ops.add(add_20, add_22) + del add_20, add_22 + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_15 = paddle._C_ops.shape64(add_23) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + shape64_15, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_15 + + # pd_op.layer_norm: (-1x784x192xf32, -1x784xf32, -1x784xf32) <- (-1x784x192xf32, 192xf32, 192xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_23, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_21 = [slice_24, full_18, full_18, full_19] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_20 = paddle._C_ops.stack(combine_21, 0) + del combine_21 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x784x192xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(layer_norm_24, stack_20) + del layer_norm_24, stack_20 + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_16 = paddle._C_ops.shape64(reshape_45) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + shape64_16, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_16 + + # pd_op.roll: (-1x28x28x192xf32) <- (-1x28x28x192xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_45, full_int_array_11, [1, 2]) + del reshape_45 + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_17 = paddle._C_ops.shape64(roll_2) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + shape64_17, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_17 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_22 = [slice_26, full_20, full_3, full_20, full_3, full_19] + del slice_26 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_21 = paddle._C_ops.stack(combine_22, 0) + del combine_22 + + # pd_op.reshape: (-1x4x7x4x7x192xf32) <- (-1x28x28x192xf32, 6xi64) + reshape_46 = paddle._C_ops.reshape(roll_2, stack_21) + del roll_2, stack_21 + + # pd_op.transpose: (-1x4x4x7x7x192xf32) <- (-1x4x7x4x7x192xf32) + transpose_20 = paddle._C_ops.transpose(reshape_46, [0, 1, 3, 2, 4, 5]) + del reshape_46 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x4x4x7x7x192xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(transpose_20, full_int_array_33) + del transpose_20 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x7x7x192xf32, 3xi64) + reshape_48 = paddle._C_ops.reshape(reshape_47, full_int_array_34) + del full_int_array_34, reshape_47 + + # pd_op.full: (1x28x28x1xf32) <- () + full_23 = paddle._C_ops.full( + [1, 28, 28, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_23, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_23 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_37 = [1, 4, 7, 4, 7, 1] + + # pd_op.reshape: (1x4x7x4x7x1xf32) <- (1x28x28x1xf32, 6xi64) + reshape_49 = paddle._C_ops.reshape(set_value__1, full_int_array_37) + del full_int_array_37 + + # pd_op.transpose: (1x4x4x7x7x1xf32) <- (1x4x7x4x7x1xf32) + transpose_21 = paddle._C_ops.transpose(reshape_49, [0, 1, 3, 2, 4, 5]) + del reshape_49 + + # pd_op.reshape: (16x7x7x1xf32) <- (1x4x4x7x7x1xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(transpose_21, full_int_array_27) + del transpose_21 + + # pd_op.reshape: (16x49xf32) <- (16x7x7x1xf32, 2xi64) + reshape_51 = paddle._C_ops.reshape(reshape_50, full_int_array_28) + del reshape_50 + + # pd_op.unsqueeze: (16x1x49xf32) <- (16x49xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(reshape_51, full_int_array_1) + + # pd_op.unsqueeze: (16x49x1xf32) <- (16x49xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(reshape_51, full_int_array_5) + del reshape_51 + + # pd_op.subtract: (16x49x49xf32) <- (16x1x49xf32, 16x49x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_7, unsqueeze_8) + del unsqueeze_7, unsqueeze_8 + + # pd_op.not_equal: (16x49x49xb) <- (16x49x49xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_10) + + # pd_op.full: (16x49x49xf32) <- () + full_24 = paddle._C_ops.full( + [16, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_24, subtract_1) + del full_24, not_equal_1, subtract_1 + + # pd_op.equal: (16x49x49xb) <- (16x49x49xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_10) + + # pd_op.full: (16x49x49xf32) <- () + full_25 = paddle._C_ops.full( + [16, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_3 = paddle._C_ops.where(equal_1, full_25, where_2) + del equal_1, full_25, where_2 + + # pd_op.shape64: (3xi64) <- (-1x49x192xf32) + shape64_18 = paddle._C_ops.shape64(reshape_48) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + shape64_18, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_18 + + # pd_op.matmul: (-1x49x576xf32) <- (-1x49x192xf32, 192x576xf32) + matmul_19 = paddle._C_ops.matmul(reshape_48, parameter_259, False, False) + del parameter_259, reshape_48 + + # pd_op.add: (-1x49x576xf32) <- (-1x49x576xf32, 576xf32) + add_24 = paddle._C_ops.add(matmul_19, parameter_258) + del matmul_19, parameter_258 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_23 = [slice_27, full_4, full_5, full_21, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_22 = paddle._C_ops.stack(combine_23, 0) + del combine_23 + + # pd_op.reshape: (-1x49x3x6x32xf32) <- (-1x49x576xf32, 5xi64) + reshape_52 = paddle._C_ops.reshape(add_24, stack_22) + del add_24, stack_22 + + # pd_op.transpose: (3x-1x6x49x32xf32) <- (-1x49x3x6x32xf32) + transpose_22 = paddle._C_ops.transpose(reshape_52, [2, 0, 3, 1, 4]) + del reshape_52 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_22 + + # pd_op.scale: (-1x6x49x32xf32) <- (-1x6x49x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_28, full_7, float("0"), True) + del slice_28 + + # pd_op.transpose: (-1x6x32x49xf32) <- (-1x6x49x32xf32) + transpose_23 = paddle._C_ops.transpose(slice_29, [0, 1, 3, 2]) + del slice_29 + + # pd_op.matmul: (-1x6x49x49xf32) <- (-1x6x49x32xf32, -1x6x32x49xf32) + matmul_20 = paddle._C_ops.matmul(scale_3, transpose_23, False, False) + del scale_3, transpose_23 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_53 = paddle._C_ops.reshape(data_7, full_int_array_7) + del data_7 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_3 = paddle._C_ops.index_select(data_8, reshape_53, 0) + del data_8, reshape_53 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_54 = paddle._C_ops.reshape(index_select_3, full_int_array_8) + del index_select_3 + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_24 = paddle._C_ops.transpose(reshape_54, [2, 0, 1]) + del reshape_54 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_24, full_int_array_0) + del transpose_24 + + # pd_op.add: (-1x6x49x49xf32) <- (-1x6x49x49xf32, 1x6x49x49xf32) + add_25 = paddle._C_ops.add(matmul_20, unsqueeze_9) + del matmul_20, unsqueeze_9 + + # pd_op.full: (xi64) <- () + full_26 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_1 = paddle._C_ops.floor_divide(slice_27, full_26) + del full_26 + + # pd_op.full: (xi64) <- () + full_27 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_24 = [floor_divide_1, full_27, full_21, full_4, full_4] + del floor_divide_1, full_27 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_23 = paddle._C_ops.stack(combine_24, 0) + del combine_24 + + # pd_op.reshape: (-1x16x6x49x49xf32) <- (-1x6x49x49xf32, 5xi64) + reshape_55 = paddle._C_ops.reshape(add_25, stack_23) + del add_25, stack_23 + + # pd_op.unsqueeze: (16x1x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(where_3, full_int_array_1) + del where_3 + + # pd_op.unsqueeze: (1x16x1x49x49xf32) <- (16x1x49x49xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_10, full_int_array_0) + del unsqueeze_10 + + # pd_op.add: (-1x16x6x49x49xf32) <- (-1x16x6x49x49xf32, 1x16x1x49x49xf32) + add_26 = paddle._C_ops.add(reshape_55, unsqueeze_11) + del reshape_55, unsqueeze_11 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_25 = [slice_27, full_21, full_4, full_4] + del full_21 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_24 = paddle._C_ops.stack(combine_25, 0) + del combine_25 + + # pd_op.reshape: (-1x6x49x49xf32) <- (-1x16x6x49x49xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(add_26, stack_24) + del add_26, stack_24 + + # pd_op.softmax: (-1x6x49x49xf32) <- (-1x6x49x49xf32) + softmax_3 = paddle._C_ops.softmax(reshape_56, -1) + del reshape_56 + + # pd_op.matmul: (-1x6x49x32xf32) <- (-1x6x49x49xf32, -1x6x49x32xf32) + matmul_21 = paddle._C_ops.matmul(softmax_3, slice_30, False, False) + del slice_30, softmax_3 + + # pd_op.transpose: (-1x49x6x32xf32) <- (-1x6x49x32xf32) + transpose_25 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_26 = [slice_27, full_4, full_19] + del slice_27 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_25 = paddle._C_ops.stack(combine_26, 0) + del combine_26 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x49x6x32xf32, 3xi64) + reshape_57 = paddle._C_ops.reshape(transpose_25, stack_25) + del stack_25, transpose_25 + + # pd_op.matmul: (-1x49x192xf32) <- (-1x49x192xf32, 192x192xf32) + matmul_22 = paddle._C_ops.matmul(reshape_57, parameter_257, False, False) + del parameter_257, reshape_57 + + # pd_op.add: (-1x49x192xf32) <- (-1x49x192xf32, 192xf32) + add_27 = paddle._C_ops.add(matmul_22, parameter_256) + del matmul_22, parameter_256 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x49x192xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(add_27, full_int_array_33) + del add_27, full_int_array_33 + + # pd_op.reshape: (-1x4x4x7x7x192xf32) <- (-1x7x7x192xf32, 6xi64) + reshape_59 = paddle._C_ops.reshape(reshape_58, full_int_array_35) + del full_int_array_35, reshape_58 + + # pd_op.transpose: (-1x4x7x4x7x192xf32) <- (-1x4x4x7x7x192xf32) + transpose_26 = paddle._C_ops.transpose(reshape_59, [0, 1, 3, 2, 4, 5]) + del reshape_59 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x4x7x4x7x192xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(transpose_26, full_int_array_36) + del full_int_array_36, transpose_26 + + # pd_op.roll: (-1x28x28x192xf32) <- (-1x28x28x192xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_60, full_int_array_29, [1, 2]) + del reshape_60 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_27 = [slice_24, full_22, full_19] + del full_22, slice_24 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_26 = paddle._C_ops.stack(combine_27, 0) + del combine_27 + + # pd_op.reshape: (-1x784x192xf32) <- (-1x28x28x192xf32, 3xi64) + reshape_61 = paddle._C_ops.reshape(roll_3, stack_26) + del roll_3, stack_26 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, -1x784x192xf32) + add_28 = paddle._C_ops.add(add_23, reshape_61) + del add_23, reshape_61 + + # pd_op.layer_norm: (-1x784x192xf32, -1x784xf32, -1x784xf32) <- (-1x784x192xf32, 192xf32, 192xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_28, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (-1x784x768xf32) <- (-1x784x192xf32, 192x768xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del layer_norm_27, parameter_253 + + # pd_op.add: (-1x784x768xf32) <- (-1x784x768xf32, 768xf32) + add_29 = paddle._C_ops.add(matmul_23, parameter_252) + del matmul_23, parameter_252 + + # pd_op.gelu: (-1x784x768xf32) <- (-1x784x768xf32) + gelu_3 = paddle._C_ops.gelu(add_29, False) + del add_29 + + # pd_op.matmul: (-1x784x192xf32) <- (-1x784x768xf32, 768x192xf32) + matmul_24 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del gelu_3, parameter_251 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, 192xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_250) + del matmul_24, parameter_250 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, -1x784x192xf32) + add_31 = paddle._C_ops.add(add_28, add_30) + del add_28, add_30 + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_19 = paddle._C_ops.shape64(add_31) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + shape64_19, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_19 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_28 = [slice_31, full_18, full_18, full_19] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_27 = paddle._C_ops.stack(combine_28, 0) + del combine_28 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x784x192xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(add_31, stack_27) + del add_31, stack_27 + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_20 = paddle._C_ops.shape64(reshape_62) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + shape64_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_20 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_29 = [slice_32, full_18, full_18, full_19] + del full_18, full_19, slice_32 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_28 = paddle._C_ops.stack(combine_29, 0) + del combine_29 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x28x28x192xf32, 4xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, stack_28) + del reshape_62, stack_28 + + # builtin.combine: ([-1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32]) <- (-1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32) + combine_30 = [ + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + ] + del strided_slice_4, strided_slice_5, strided_slice_6, strided_slice_7 + + # pd_op.concat: (-1x14x14x768xf32) <- ([-1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_30, full_15) + del combine_30 + + # pd_op.full: (xi64) <- () + full_28 = paddle._C_ops.full( + [], float("768"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_31 = [slice_31, full_16, full_28] + del slice_31 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_29 = paddle._C_ops.stack(combine_31, 0) + del combine_31 + + # pd_op.reshape: (-1x-1x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_64 = paddle._C_ops.reshape(concat_1, stack_29) + del concat_1, stack_29 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_64, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249, reshape_64 + + # pd_op.matmul: (-1x-1x384xf32) <- (-1x-1x768xf32, 768x384xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del layer_norm_30, parameter_247 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_21 = paddle._C_ops.shape64(matmul_25) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + shape64_21, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_21 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_22 = paddle._C_ops.shape64(matmul_25) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + shape64_22, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_22 + + # pd_op.layer_norm: (-1x-1x384xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x384xf32, 384xf32, 384xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_25, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full: (xi64) <- () + full_29 = paddle._C_ops.full( + [], float("14"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_32 = [slice_33, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_30 = paddle._C_ops.stack(combine_32, 0) + del combine_32 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x-1x384xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(layer_norm_33, stack_30) + del layer_norm_33, stack_30 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_23 = paddle._C_ops.shape64(reshape_65) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + shape64_23, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_23 + + # pd_op.full: (xi64) <- () + full_30 = paddle._C_ops.full( + [], float("2"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_33 = [slice_35, full_30, full_3, full_30, full_3, full_17] + del slice_35 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_31 = paddle._C_ops.stack(combine_33, 0) + del combine_33 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, stack_31) + del reshape_65, stack_31 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_27 = paddle._C_ops.transpose(reshape_66, [0, 1, 3, 2, 4, 5]) + del reshape_66 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_38 = [-1, 7, 7, 384] + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_67 = paddle._C_ops.reshape(transpose_27, full_int_array_38) + del transpose_27 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_39 = [-1, 49, 384] + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_68 = paddle._C_ops.reshape(reshape_67, full_int_array_39) + del reshape_67 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_24 = paddle._C_ops.shape64(reshape_68) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + shape64_24, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_24 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_26 = paddle._C_ops.matmul(reshape_68, parameter_244, False, False) + del parameter_244, reshape_68 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_32 = paddle._C_ops.add(matmul_26, parameter_243) + del matmul_26, parameter_243 + + # pd_op.full: (xi64) <- () + full_31 = paddle._C_ops.full( + [], float("12"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_34 = [slice_36, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_32 = paddle._C_ops.stack(combine_34, 0) + del combine_34 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_69 = paddle._C_ops.reshape(add_32, stack_32) + del add_32, stack_32 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_28 = paddle._C_ops.transpose(reshape_69, [2, 0, 3, 1, 4]) + del reshape_69 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_28 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_37, full_7, float("0"), True) + del slice_37 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_29 = paddle._C_ops.transpose(slice_38, [0, 1, 3, 2]) + del slice_38 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_27 = paddle._C_ops.matmul(scale_4, transpose_29, False, False) + del scale_4, transpose_29 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_70 = paddle._C_ops.reshape(data_9, full_int_array_7) + del data_9 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_4 = paddle._C_ops.index_select(data_10, reshape_70, 0) + del data_10, reshape_70 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_71 = paddle._C_ops.reshape(index_select_4, full_int_array_8) + del index_select_4 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_30 = paddle._C_ops.transpose(reshape_71, [2, 0, 1]) + del reshape_71 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_30, full_int_array_0) + del transpose_30 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_33 = paddle._C_ops.add(matmul_27, unsqueeze_12) + del matmul_27, unsqueeze_12 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_4 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_28 = paddle._C_ops.matmul(softmax_4, slice_39, False, False) + del slice_39, softmax_4 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_31 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_35 = [slice_36, full_4, full_17] + del slice_36 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_33 = paddle._C_ops.stack(combine_35, 0) + del combine_35 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(transpose_31, stack_33) + del stack_33, transpose_31 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_29 = paddle._C_ops.matmul(reshape_72, parameter_242, False, False) + del parameter_242, reshape_72 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_34 = paddle._C_ops.add(matmul_29, parameter_241) + del matmul_29, parameter_241 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(add_34, full_int_array_38) + del add_34 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_40 = [-1, 2, 2, 7, 7, 384] + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_74 = paddle._C_ops.reshape(reshape_73, full_int_array_40) + del reshape_73 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_32 = paddle._C_ops.transpose(reshape_74, [0, 1, 3, 2, 4, 5]) + del reshape_74 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_41 = [-1, 14, 14, 384] + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_75 = paddle._C_ops.reshape(transpose_32, full_int_array_41) + del transpose_32 + + # pd_op.full: (xi64) <- () + full_32 = paddle._C_ops.full( + [], float("196"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_36 = [slice_33, full_32, full_17] + del slice_33 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_34 = paddle._C_ops.stack(combine_36, 0) + del combine_36 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_76 = paddle._C_ops.reshape(reshape_75, stack_34) + del reshape_75, stack_34 + + # pd_op.add: (-1x196x384xf32) <- (-1x-1x384xf32, -1x196x384xf32) + add_35 = paddle._C_ops.add(matmul_25, reshape_76) + del matmul_25, reshape_76 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del layer_norm_36, parameter_238 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_36 = paddle._C_ops.add(matmul_30, parameter_237) + del matmul_30, parameter_237 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_4 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_31 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del gelu_4, parameter_236 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_37 = paddle._C_ops.add(matmul_31, parameter_235) + del matmul_31, parameter_235 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_38 = paddle._C_ops.add(add_35, add_37) + del add_35, add_37 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_25 = paddle._C_ops.shape64(add_38) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + shape64_25, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_25 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_37 = [slice_40, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_35 = paddle._C_ops.stack(combine_37, 0) + del combine_37 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(layer_norm_39, stack_35) + del layer_norm_39, stack_35 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_26 = paddle._C_ops.shape64(reshape_77) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + shape64_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_26 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_77, full_int_array_11, [1, 2]) + del reshape_77 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_27 = paddle._C_ops.shape64(roll_4) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + shape64_27, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_38 = [slice_42, full_30, full_3, full_30, full_3, full_17] + del slice_42 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_36 = paddle._C_ops.stack(combine_38, 0) + del combine_38 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_78 = paddle._C_ops.reshape(roll_4, stack_36) + del roll_4, stack_36 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_33 = paddle._C_ops.transpose(reshape_78, [0, 1, 3, 2, 4, 5]) + del reshape_78 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_33, full_int_array_38) + del transpose_33 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, full_int_array_39) + del reshape_79 + + # pd_op.full: (1x14x14x1xf32) <- () + full_33 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_33, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_42 = [1, 2, 7, 2, 7, 1] + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_81 = paddle._C_ops.reshape(set_value__2, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_34 = paddle._C_ops.transpose(reshape_81, [0, 1, 3, 2, 4, 5]) + del reshape_81 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_34, full_int_array_27) + del transpose_34 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_28) + del reshape_82 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(reshape_83, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(reshape_83, full_int_array_5) + del reshape_83 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_13, unsqueeze_14) + del unsqueeze_13, unsqueeze_14 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_10) + + # pd_op.full: (4x49x49xf32) <- () + full_34 = paddle._C_ops.full( + [4, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_34, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_10) + + # pd_op.full: (4x49x49xf32) <- () + full_35 = paddle._C_ops.full( + [4, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_5 = paddle._C_ops.where(equal_2, full_35, where_4) + del equal_2, where_4 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_28 = paddle._C_ops.shape64(reshape_80) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + shape64_28, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_28 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_32 = paddle._C_ops.matmul(reshape_80, parameter_232, False, False) + del parameter_232, reshape_80 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_39 = paddle._C_ops.add(matmul_32, parameter_231) + del matmul_32, parameter_231 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_39 = [slice_43, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_37 = paddle._C_ops.stack(combine_39, 0) + del combine_39 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_84 = paddle._C_ops.reshape(add_39, stack_37) + del add_39, stack_37 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_35 = paddle._C_ops.transpose(reshape_84, [2, 0, 3, 1, 4]) + del reshape_84 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_35 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_44, full_7, float("0"), True) + del slice_44 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_36 = paddle._C_ops.transpose(slice_45, [0, 1, 3, 2]) + del slice_45 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_33 = paddle._C_ops.matmul(scale_5, transpose_36, False, False) + del scale_5, transpose_36 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_85 = paddle._C_ops.reshape(data_11, full_int_array_7) + del data_11 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_5 = paddle._C_ops.index_select(data_12, reshape_85, 0) + del data_12, reshape_85 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(index_select_5, full_int_array_8) + del index_select_5 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_37 = paddle._C_ops.transpose(reshape_86, [2, 0, 1]) + del reshape_86 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_37, full_int_array_0) + del transpose_37 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_40 = paddle._C_ops.add(matmul_33, unsqueeze_15) + del matmul_33, unsqueeze_15 + + # pd_op.full: (xi64) <- () + full_36 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_2 = paddle._C_ops.floor_divide(slice_43, full_36) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_40 = [floor_divide_2, full_20, full_31, full_4, full_4] + del floor_divide_2 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_38 = paddle._C_ops.stack(combine_40, 0) + del combine_40 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_87 = paddle._C_ops.reshape(add_40, stack_38) + del add_40, stack_38 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(where_5, full_int_array_1) + del where_5 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_16, full_int_array_0) + del unsqueeze_16 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_41 = paddle._C_ops.add(reshape_87, unsqueeze_17) + del reshape_87, unsqueeze_17 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_41 = [slice_43, full_31, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_39 = paddle._C_ops.stack(combine_41, 0) + del combine_41 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(add_41, stack_39) + del add_41, stack_39 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_5 = paddle._C_ops.softmax(reshape_88, -1) + del reshape_88 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_34 = paddle._C_ops.matmul(softmax_5, slice_46, False, False) + del slice_46, softmax_5 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_38 = paddle._C_ops.transpose(matmul_34, [0, 2, 1, 3]) + del matmul_34 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_42 = [slice_43, full_4, full_17] + del slice_43 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_40 = paddle._C_ops.stack(combine_42, 0) + del combine_42 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(transpose_38, stack_40) + del stack_40, transpose_38 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_35 = paddle._C_ops.matmul(reshape_89, parameter_230, False, False) + del parameter_230, reshape_89 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_42 = paddle._C_ops.add(matmul_35, parameter_229) + del matmul_35, parameter_229 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(add_42, full_int_array_38) + del add_42 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_91 = paddle._C_ops.reshape(reshape_90, full_int_array_40) + del reshape_90 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_39 = paddle._C_ops.transpose(reshape_91, [0, 1, 3, 2, 4, 5]) + del reshape_91 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(transpose_39, full_int_array_41) + del transpose_39 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_92, full_int_array_29, [1, 2]) + del reshape_92 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_43 = [slice_40, full_32, full_17] + del slice_40 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_41 = paddle._C_ops.stack(combine_43, 0) + del combine_43 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_93 = paddle._C_ops.reshape(roll_5, stack_41) + del roll_5, stack_41 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_43 = paddle._C_ops.add(add_38, reshape_93) + del add_38, reshape_93 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_43, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_36 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del layer_norm_42, parameter_226 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_44 = paddle._C_ops.add(matmul_36, parameter_225) + del matmul_36, parameter_225 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_5 = paddle._C_ops.gelu(add_44, False) + del add_44 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_37 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del gelu_5, parameter_224 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_45 = paddle._C_ops.add(matmul_37, parameter_223) + del matmul_37, parameter_223 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_46 = paddle._C_ops.add(add_43, add_45) + del add_43, add_45 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_29 = paddle._C_ops.shape64(add_46) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + shape64_29, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_29 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_46, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_44 = [slice_47, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_42 = paddle._C_ops.stack(combine_44, 0) + del combine_44 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_94 = paddle._C_ops.reshape(layer_norm_45, stack_42) + del layer_norm_45, stack_42 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_30 = paddle._C_ops.shape64(reshape_94) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + shape64_30, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_30 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_45 = [slice_48, full_30, full_3, full_30, full_3, full_17] + del slice_48 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_43 = paddle._C_ops.stack(combine_45, 0) + del combine_45 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_95 = paddle._C_ops.reshape(reshape_94, stack_43) + del reshape_94, stack_43 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_40 = paddle._C_ops.transpose(reshape_95, [0, 1, 3, 2, 4, 5]) + del reshape_95 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_40, full_int_array_38) + del transpose_40 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, full_int_array_39) + del reshape_96 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_31 = paddle._C_ops.shape64(reshape_97) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + shape64_31, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_31 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_38 = paddle._C_ops.matmul(reshape_97, parameter_220, False, False) + del parameter_220, reshape_97 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_47 = paddle._C_ops.add(matmul_38, parameter_219) + del matmul_38, parameter_219 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_46 = [slice_49, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_44 = paddle._C_ops.stack(combine_46, 0) + del combine_46 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_98 = paddle._C_ops.reshape(add_47, stack_44) + del add_47, stack_44 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_41 = paddle._C_ops.transpose(reshape_98, [2, 0, 3, 1, 4]) + del reshape_98 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_41 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_50, full_7, float("0"), True) + del slice_50 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_42 = paddle._C_ops.transpose(slice_51, [0, 1, 3, 2]) + del slice_51 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_39 = paddle._C_ops.matmul(scale_6, transpose_42, False, False) + del scale_6, transpose_42 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_99 = paddle._C_ops.reshape(data_13, full_int_array_7) + del data_13 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_6 = paddle._C_ops.index_select(data_14, reshape_99, 0) + del data_14, reshape_99 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(index_select_6, full_int_array_8) + del index_select_6 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_43 = paddle._C_ops.transpose(reshape_100, [2, 0, 1]) + del reshape_100 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_43, full_int_array_0) + del transpose_43 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_48 = paddle._C_ops.add(matmul_39, unsqueeze_18) + del matmul_39, unsqueeze_18 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_6 = paddle._C_ops.softmax(add_48, -1) + del add_48 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_40 = paddle._C_ops.matmul(softmax_6, slice_52, False, False) + del slice_52, softmax_6 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_44 = paddle._C_ops.transpose(matmul_40, [0, 2, 1, 3]) + del matmul_40 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_47 = [slice_49, full_4, full_17] + del slice_49 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_45 = paddle._C_ops.stack(combine_47, 0) + del combine_47 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_101 = paddle._C_ops.reshape(transpose_44, stack_45) + del stack_45, transpose_44 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_41 = paddle._C_ops.matmul(reshape_101, parameter_218, False, False) + del parameter_218, reshape_101 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_49 = paddle._C_ops.add(matmul_41, parameter_217) + del matmul_41, parameter_217 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_102 = paddle._C_ops.reshape(add_49, full_int_array_38) + del add_49 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_103 = paddle._C_ops.reshape(reshape_102, full_int_array_40) + del reshape_102 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_45 = paddle._C_ops.transpose(reshape_103, [0, 1, 3, 2, 4, 5]) + del reshape_103 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(transpose_45, full_int_array_41) + del transpose_45 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_48 = [slice_47, full_32, full_17] + del slice_47 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_46 = paddle._C_ops.stack(combine_48, 0) + del combine_48 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_105 = paddle._C_ops.reshape(reshape_104, stack_46) + del reshape_104, stack_46 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_50 = paddle._C_ops.add(add_46, reshape_105) + del add_46, reshape_105 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_50, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del layer_norm_48, parameter_214 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_51 = paddle._C_ops.add(matmul_42, parameter_213) + del matmul_42, parameter_213 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_6 = paddle._C_ops.gelu(add_51, False) + del add_51 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_43 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del gelu_6, parameter_212 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_52 = paddle._C_ops.add(matmul_43, parameter_211) + del matmul_43, parameter_211 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_53 = paddle._C_ops.add(add_50, add_52) + del add_50, add_52 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_32 = paddle._C_ops.shape64(add_53) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + shape64_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_32 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_49 = [slice_53, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_47 = paddle._C_ops.stack(combine_49, 0) + del combine_49 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_106 = paddle._C_ops.reshape(layer_norm_51, stack_47) + del layer_norm_51, stack_47 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_33 = paddle._C_ops.shape64(reshape_106) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + shape64_33, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_33 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_106, full_int_array_11, [1, 2]) + del reshape_106 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_34 = paddle._C_ops.shape64(roll_6) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + shape64_34, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_34 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_50 = [slice_55, full_30, full_3, full_30, full_3, full_17] + del slice_55 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_48 = paddle._C_ops.stack(combine_50, 0) + del combine_50 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_107 = paddle._C_ops.reshape(roll_6, stack_48) + del roll_6, stack_48 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_46 = paddle._C_ops.transpose(reshape_107, [0, 1, 3, 2, 4, 5]) + del reshape_107 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_46, full_int_array_38) + del transpose_46 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_39) + del reshape_108 + + # pd_op.full: (1x14x14x1xf32) <- () + full_37 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_37, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_110 = paddle._C_ops.reshape(set_value__3, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_47 = paddle._C_ops.transpose(reshape_110, [0, 1, 3, 2, 4, 5]) + del reshape_110 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_111 = paddle._C_ops.reshape(transpose_47, full_int_array_27) + del transpose_47 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_112 = paddle._C_ops.reshape(reshape_111, full_int_array_28) + del reshape_111 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(reshape_112, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(reshape_112, full_int_array_5) + del reshape_112 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_19, unsqueeze_20) + del unsqueeze_19, unsqueeze_20 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_34, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_7 = paddle._C_ops.where(equal_3, full_35, where_6) + del equal_3, where_6 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_35 = paddle._C_ops.shape64(reshape_109) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + shape64_35, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_35 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_44 = paddle._C_ops.matmul(reshape_109, parameter_208, False, False) + del parameter_208, reshape_109 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_54 = paddle._C_ops.add(matmul_44, parameter_207) + del matmul_44, parameter_207 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_51 = [slice_56, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_49 = paddle._C_ops.stack(combine_51, 0) + del combine_51 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_113 = paddle._C_ops.reshape(add_54, stack_49) + del add_54, stack_49 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_48 = paddle._C_ops.transpose(reshape_113, [2, 0, 3, 1, 4]) + del reshape_113 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_48 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_57, full_7, float("0"), True) + del slice_57 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_49 = paddle._C_ops.transpose(slice_58, [0, 1, 3, 2]) + del slice_58 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_45 = paddle._C_ops.matmul(scale_7, transpose_49, False, False) + del scale_7, transpose_49 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_114 = paddle._C_ops.reshape(data_15, full_int_array_7) + del data_15 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_7 = paddle._C_ops.index_select(data_16, reshape_114, 0) + del data_16, reshape_114 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_115 = paddle._C_ops.reshape(index_select_7, full_int_array_8) + del index_select_7 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_50 = paddle._C_ops.transpose(reshape_115, [2, 0, 1]) + del reshape_115 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_50, full_int_array_0) + del transpose_50 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_55 = paddle._C_ops.add(matmul_45, unsqueeze_21) + del matmul_45, unsqueeze_21 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_3 = paddle._C_ops.floor_divide(slice_56, full_36) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_52 = [floor_divide_3, full_20, full_31, full_4, full_4] + del floor_divide_3 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_50 = paddle._C_ops.stack(combine_52, 0) + del combine_52 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_116 = paddle._C_ops.reshape(add_55, stack_50) + del add_55, stack_50 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(where_7, full_int_array_1) + del where_7 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_22, full_int_array_0) + del unsqueeze_22 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_56 = paddle._C_ops.add(reshape_116, unsqueeze_23) + del reshape_116, unsqueeze_23 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_53 = [slice_56, full_31, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_51 = paddle._C_ops.stack(combine_53, 0) + del combine_53 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_117 = paddle._C_ops.reshape(add_56, stack_51) + del add_56, stack_51 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_7 = paddle._C_ops.softmax(reshape_117, -1) + del reshape_117 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_46 = paddle._C_ops.matmul(softmax_7, slice_59, False, False) + del slice_59, softmax_7 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_51 = paddle._C_ops.transpose(matmul_46, [0, 2, 1, 3]) + del matmul_46 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_54 = [slice_56, full_4, full_17] + del slice_56 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_52 = paddle._C_ops.stack(combine_54, 0) + del combine_54 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_118 = paddle._C_ops.reshape(transpose_51, stack_52) + del stack_52, transpose_51 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_47 = paddle._C_ops.matmul(reshape_118, parameter_206, False, False) + del parameter_206, reshape_118 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_57 = paddle._C_ops.add(matmul_47, parameter_205) + del matmul_47, parameter_205 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_119 = paddle._C_ops.reshape(add_57, full_int_array_38) + del add_57 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_120 = paddle._C_ops.reshape(reshape_119, full_int_array_40) + del reshape_119 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_52 = paddle._C_ops.transpose(reshape_120, [0, 1, 3, 2, 4, 5]) + del reshape_120 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(transpose_52, full_int_array_41) + del transpose_52 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_121, full_int_array_29, [1, 2]) + del reshape_121 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_55 = [slice_53, full_32, full_17] + del slice_53 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_53 = paddle._C_ops.stack(combine_55, 0) + del combine_55 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_122 = paddle._C_ops.reshape(roll_7, stack_53) + del roll_7, stack_53 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_58 = paddle._C_ops.add(add_53, reshape_122) + del add_53, reshape_122 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_58, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del layer_norm_54, parameter_202 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_59 = paddle._C_ops.add(matmul_48, parameter_201) + del matmul_48, parameter_201 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_7 = paddle._C_ops.gelu(add_59, False) + del add_59 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_49 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del gelu_7, parameter_200 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_60 = paddle._C_ops.add(matmul_49, parameter_199) + del matmul_49, parameter_199 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_61 = paddle._C_ops.add(add_58, add_60) + del add_58, add_60 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_36 = paddle._C_ops.shape64(add_61) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + shape64_36, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_36 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_61, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_56 = [slice_60, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_54 = paddle._C_ops.stack(combine_56, 0) + del combine_56 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_123 = paddle._C_ops.reshape(layer_norm_57, stack_54) + del layer_norm_57, stack_54 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_37 = paddle._C_ops.shape64(reshape_123) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + shape64_37, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_37 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_57 = [slice_61, full_30, full_3, full_30, full_3, full_17] + del slice_61 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_55 = paddle._C_ops.stack(combine_57, 0) + del combine_57 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_124 = paddle._C_ops.reshape(reshape_123, stack_55) + del reshape_123, stack_55 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_53 = paddle._C_ops.transpose(reshape_124, [0, 1, 3, 2, 4, 5]) + del reshape_124 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_53, full_int_array_38) + del transpose_53 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_39) + del reshape_125 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_38 = paddle._C_ops.shape64(reshape_126) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + shape64_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_38 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_50 = paddle._C_ops.matmul(reshape_126, parameter_196, False, False) + del parameter_196, reshape_126 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_62 = paddle._C_ops.add(matmul_50, parameter_195) + del matmul_50, parameter_195 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_58 = [slice_62, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_56 = paddle._C_ops.stack(combine_58, 0) + del combine_58 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_127 = paddle._C_ops.reshape(add_62, stack_56) + del add_62, stack_56 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_54 = paddle._C_ops.transpose(reshape_127, [2, 0, 3, 1, 4]) + del reshape_127 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_54 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_63, full_7, float("0"), True) + del slice_63 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_55 = paddle._C_ops.transpose(slice_64, [0, 1, 3, 2]) + del slice_64 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_51 = paddle._C_ops.matmul(scale_8, transpose_55, False, False) + del scale_8, transpose_55 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_128 = paddle._C_ops.reshape(data_17, full_int_array_7) + del data_17 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_8 = paddle._C_ops.index_select(data_18, reshape_128, 0) + del data_18, reshape_128 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_129 = paddle._C_ops.reshape(index_select_8, full_int_array_8) + del index_select_8 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_56 = paddle._C_ops.transpose(reshape_129, [2, 0, 1]) + del reshape_129 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_56, full_int_array_0) + del transpose_56 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_63 = paddle._C_ops.add(matmul_51, unsqueeze_24) + del matmul_51, unsqueeze_24 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_8 = paddle._C_ops.softmax(add_63, -1) + del add_63 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_52 = paddle._C_ops.matmul(softmax_8, slice_65, False, False) + del slice_65, softmax_8 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_57 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_59 = [slice_62, full_4, full_17] + del slice_62 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_57 = paddle._C_ops.stack(combine_59, 0) + del combine_59 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_130 = paddle._C_ops.reshape(transpose_57, stack_57) + del stack_57, transpose_57 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_53 = paddle._C_ops.matmul(reshape_130, parameter_194, False, False) + del parameter_194, reshape_130 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_64 = paddle._C_ops.add(matmul_53, parameter_193) + del matmul_53, parameter_193 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_131 = paddle._C_ops.reshape(add_64, full_int_array_38) + del add_64 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_132 = paddle._C_ops.reshape(reshape_131, full_int_array_40) + del reshape_131 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_58 = paddle._C_ops.transpose(reshape_132, [0, 1, 3, 2, 4, 5]) + del reshape_132 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_58, full_int_array_41) + del transpose_58 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_60 = [slice_60, full_32, full_17] + del slice_60 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_58 = paddle._C_ops.stack(combine_60, 0) + del combine_60 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, stack_58) + del reshape_133, stack_58 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_65 = paddle._C_ops.add(add_61, reshape_134) + del add_61, reshape_134 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_65, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del layer_norm_60, parameter_190 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_66 = paddle._C_ops.add(matmul_54, parameter_189) + del matmul_54, parameter_189 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_8 = paddle._C_ops.gelu(add_66, False) + del add_66 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_55 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del gelu_8, parameter_188 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_67 = paddle._C_ops.add(matmul_55, parameter_187) + del matmul_55, parameter_187 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_68 = paddle._C_ops.add(add_65, add_67) + del add_65, add_67 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_39 = paddle._C_ops.shape64(add_68) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + shape64_39, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_39 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_68, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_61 = [slice_66, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_59 = paddle._C_ops.stack(combine_61, 0) + del combine_61 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_135 = paddle._C_ops.reshape(layer_norm_63, stack_59) + del layer_norm_63, stack_59 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_40 = paddle._C_ops.shape64(reshape_135) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + shape64_40, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_40 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_135, full_int_array_11, [1, 2]) + del reshape_135 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_41 = paddle._C_ops.shape64(roll_8) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + shape64_41, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_41 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_62 = [slice_68, full_30, full_3, full_30, full_3, full_17] + del slice_68 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_60 = paddle._C_ops.stack(combine_62, 0) + del combine_62 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_136 = paddle._C_ops.reshape(roll_8, stack_60) + del roll_8, stack_60 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_59 = paddle._C_ops.transpose(reshape_136, [0, 1, 3, 2, 4, 5]) + del reshape_136 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_137 = paddle._C_ops.reshape(transpose_59, full_int_array_38) + del transpose_59 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_138 = paddle._C_ops.reshape(reshape_137, full_int_array_39) + del reshape_137 + + # pd_op.full: (1x14x14x1xf32) <- () + full_38 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_38, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_139 = paddle._C_ops.reshape(set_value__4, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_60 = paddle._C_ops.transpose(reshape_139, [0, 1, 3, 2, 4, 5]) + del reshape_139 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_140 = paddle._C_ops.reshape(transpose_60, full_int_array_27) + del transpose_60 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_141 = paddle._C_ops.reshape(reshape_140, full_int_array_28) + del reshape_140 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(reshape_141, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(reshape_141, full_int_array_5) + del reshape_141 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_25, unsqueeze_26) + del unsqueeze_25, unsqueeze_26 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_34, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_9 = paddle._C_ops.where(equal_4, full_35, where_8) + del equal_4, where_8 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_42 = paddle._C_ops.shape64(reshape_138) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + shape64_42, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_42 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_56 = paddle._C_ops.matmul(reshape_138, parameter_184, False, False) + del parameter_184, reshape_138 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_69 = paddle._C_ops.add(matmul_56, parameter_183) + del matmul_56, parameter_183 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_63 = [slice_69, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_61 = paddle._C_ops.stack(combine_63, 0) + del combine_63 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_142 = paddle._C_ops.reshape(add_69, stack_61) + del add_69, stack_61 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_61 = paddle._C_ops.transpose(reshape_142, [2, 0, 3, 1, 4]) + del reshape_142 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_72 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_61 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_70, full_7, float("0"), True) + del slice_70 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_62 = paddle._C_ops.transpose(slice_71, [0, 1, 3, 2]) + del slice_71 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_57 = paddle._C_ops.matmul(scale_9, transpose_62, False, False) + del scale_9, transpose_62 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_143 = paddle._C_ops.reshape(data_19, full_int_array_7) + del data_19 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_9 = paddle._C_ops.index_select(data_20, reshape_143, 0) + del data_20, reshape_143 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_144 = paddle._C_ops.reshape(index_select_9, full_int_array_8) + del index_select_9 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_63 = paddle._C_ops.transpose(reshape_144, [2, 0, 1]) + del reshape_144 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_63, full_int_array_0) + del transpose_63 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_70 = paddle._C_ops.add(matmul_57, unsqueeze_27) + del matmul_57, unsqueeze_27 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_4 = paddle._C_ops.floor_divide(slice_69, full_36) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_64 = [floor_divide_4, full_20, full_31, full_4, full_4] + del floor_divide_4 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_62 = paddle._C_ops.stack(combine_64, 0) + del combine_64 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_145 = paddle._C_ops.reshape(add_70, stack_62) + del add_70, stack_62 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(where_9, full_int_array_1) + del where_9 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_28, full_int_array_0) + del unsqueeze_28 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_71 = paddle._C_ops.add(reshape_145, unsqueeze_29) + del reshape_145, unsqueeze_29 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_65 = [slice_69, full_31, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_63 = paddle._C_ops.stack(combine_65, 0) + del combine_65 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_71, stack_63) + del add_71, stack_63 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_9 = paddle._C_ops.softmax(reshape_146, -1) + del reshape_146 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_58 = paddle._C_ops.matmul(softmax_9, slice_72, False, False) + del slice_72, softmax_9 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_64 = paddle._C_ops.transpose(matmul_58, [0, 2, 1, 3]) + del matmul_58 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_66 = [slice_69, full_4, full_17] + del slice_69 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_64 = paddle._C_ops.stack(combine_66, 0) + del combine_66 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_147 = paddle._C_ops.reshape(transpose_64, stack_64) + del stack_64, transpose_64 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_59 = paddle._C_ops.matmul(reshape_147, parameter_182, False, False) + del parameter_182, reshape_147 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_72 = paddle._C_ops.add(matmul_59, parameter_181) + del matmul_59, parameter_181 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_148 = paddle._C_ops.reshape(add_72, full_int_array_38) + del add_72 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_149 = paddle._C_ops.reshape(reshape_148, full_int_array_40) + del reshape_148 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_65 = paddle._C_ops.transpose(reshape_149, [0, 1, 3, 2, 4, 5]) + del reshape_149 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_65, full_int_array_41) + del transpose_65 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_150, full_int_array_29, [1, 2]) + del reshape_150 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_67 = [slice_66, full_32, full_17] + del slice_66 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_65 = paddle._C_ops.stack(combine_67, 0) + del combine_67 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(roll_9, stack_65) + del roll_9, stack_65 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_73 = paddle._C_ops.add(add_68, reshape_151) + del add_68, reshape_151 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del layer_norm_66, parameter_178 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_74 = paddle._C_ops.add(matmul_60, parameter_177) + del matmul_60, parameter_177 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_9 = paddle._C_ops.gelu(add_74, False) + del add_74 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_61 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del gelu_9, parameter_176 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_75 = paddle._C_ops.add(matmul_61, parameter_175) + del matmul_61, parameter_175 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_76 = paddle._C_ops.add(add_73, add_75) + del add_73, add_75 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_43 = paddle._C_ops.shape64(add_76) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_73 = paddle._C_ops.slice( + shape64_43, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_43 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_68 = [slice_73, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_66 = paddle._C_ops.stack(combine_68, 0) + del combine_68 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_152 = paddle._C_ops.reshape(layer_norm_69, stack_66) + del layer_norm_69, stack_66 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_44 = paddle._C_ops.shape64(reshape_152) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_74 = paddle._C_ops.slice( + shape64_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_44 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_69 = [slice_74, full_30, full_3, full_30, full_3, full_17] + del slice_74 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_67 = paddle._C_ops.stack(combine_69, 0) + del combine_69 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_153 = paddle._C_ops.reshape(reshape_152, stack_67) + del reshape_152, stack_67 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_66 = paddle._C_ops.transpose(reshape_153, [0, 1, 3, 2, 4, 5]) + del reshape_153 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_154 = paddle._C_ops.reshape(transpose_66, full_int_array_38) + del transpose_66 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_155 = paddle._C_ops.reshape(reshape_154, full_int_array_39) + del reshape_154 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_45 = paddle._C_ops.shape64(reshape_155) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_75 = paddle._C_ops.slice( + shape64_45, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_45 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_62 = paddle._C_ops.matmul(reshape_155, parameter_172, False, False) + del parameter_172, reshape_155 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_77 = paddle._C_ops.add(matmul_62, parameter_171) + del matmul_62, parameter_171 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_70 = [slice_75, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_68 = paddle._C_ops.stack(combine_70, 0) + del combine_70 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_156 = paddle._C_ops.reshape(add_77, stack_68) + del add_77, stack_68 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_67 = paddle._C_ops.transpose(reshape_156, [2, 0, 3, 1, 4]) + del reshape_156 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_76 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_77 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_78 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_67 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_76, full_7, float("0"), True) + del slice_76 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_68 = paddle._C_ops.transpose(slice_77, [0, 1, 3, 2]) + del slice_77 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_63 = paddle._C_ops.matmul(scale_10, transpose_68, False, False) + del scale_10, transpose_68 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_157 = paddle._C_ops.reshape(data_21, full_int_array_7) + del data_21 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_10 = paddle._C_ops.index_select(data_22, reshape_157, 0) + del data_22, reshape_157 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_158 = paddle._C_ops.reshape(index_select_10, full_int_array_8) + del index_select_10 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_69 = paddle._C_ops.transpose(reshape_158, [2, 0, 1]) + del reshape_158 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_69, full_int_array_0) + del transpose_69 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_78 = paddle._C_ops.add(matmul_63, unsqueeze_30) + del matmul_63, unsqueeze_30 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_10 = paddle._C_ops.softmax(add_78, -1) + del add_78 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_64 = paddle._C_ops.matmul(softmax_10, slice_78, False, False) + del slice_78, softmax_10 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_70 = paddle._C_ops.transpose(matmul_64, [0, 2, 1, 3]) + del matmul_64 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_71 = [slice_75, full_4, full_17] + del slice_75 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_69 = paddle._C_ops.stack(combine_71, 0) + del combine_71 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_159 = paddle._C_ops.reshape(transpose_70, stack_69) + del stack_69, transpose_70 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_65 = paddle._C_ops.matmul(reshape_159, parameter_170, False, False) + del parameter_170, reshape_159 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_79 = paddle._C_ops.add(matmul_65, parameter_169) + del matmul_65, parameter_169 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_160 = paddle._C_ops.reshape(add_79, full_int_array_38) + del add_79 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_161 = paddle._C_ops.reshape(reshape_160, full_int_array_40) + del reshape_160 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_71 = paddle._C_ops.transpose(reshape_161, [0, 1, 3, 2, 4, 5]) + del reshape_161 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_162 = paddle._C_ops.reshape(transpose_71, full_int_array_41) + del transpose_71 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_72 = [slice_73, full_32, full_17] + del slice_73 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_70 = paddle._C_ops.stack(combine_72, 0) + del combine_72 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_163 = paddle._C_ops.reshape(reshape_162, stack_70) + del reshape_162, stack_70 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_80 = paddle._C_ops.add(add_76, reshape_163) + del add_76, reshape_163 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_80, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del layer_norm_72, parameter_166 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_81 = paddle._C_ops.add(matmul_66, parameter_165) + del matmul_66, parameter_165 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_10 = paddle._C_ops.gelu(add_81, False) + del add_81 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_67 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del gelu_10, parameter_164 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_82 = paddle._C_ops.add(matmul_67, parameter_163) + del matmul_67, parameter_163 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_83 = paddle._C_ops.add(add_80, add_82) + del add_80, add_82 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_46 = paddle._C_ops.shape64(add_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_79 = paddle._C_ops.slice( + shape64_46, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_46 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_83, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_73 = [slice_79, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_71 = paddle._C_ops.stack(combine_73, 0) + del combine_73 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(layer_norm_75, stack_71) + del layer_norm_75, stack_71 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_47 = paddle._C_ops.shape64(reshape_164) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_80 = paddle._C_ops.slice( + shape64_47, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_47 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_164, full_int_array_11, [1, 2]) + del reshape_164 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_48 = paddle._C_ops.shape64(roll_10) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_81 = paddle._C_ops.slice( + shape64_48, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_48 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_74 = [slice_81, full_30, full_3, full_30, full_3, full_17] + del slice_81 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_72 = paddle._C_ops.stack(combine_74, 0) + del combine_74 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_165 = paddle._C_ops.reshape(roll_10, stack_72) + del roll_10, stack_72 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_72 = paddle._C_ops.transpose(reshape_165, [0, 1, 3, 2, 4, 5]) + del reshape_165 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(transpose_72, full_int_array_38) + del transpose_72 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_167 = paddle._C_ops.reshape(reshape_166, full_int_array_39) + del reshape_166 + + # pd_op.full: (1x14x14x1xf32) <- () + full_39 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_39, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_168 = paddle._C_ops.reshape(set_value__5, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_73 = paddle._C_ops.transpose(reshape_168, [0, 1, 3, 2, 4, 5]) + del reshape_168 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_169 = paddle._C_ops.reshape(transpose_73, full_int_array_27) + del transpose_73 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_170 = paddle._C_ops.reshape(reshape_169, full_int_array_28) + del reshape_169 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(reshape_170, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(reshape_170, full_int_array_5) + del reshape_170 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_31, unsqueeze_32) + del unsqueeze_31, unsqueeze_32 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_34, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_11 = paddle._C_ops.where(equal_5, full_35, where_10) + del equal_5, where_10 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_49 = paddle._C_ops.shape64(reshape_167) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_82 = paddle._C_ops.slice( + shape64_49, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_49 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_68 = paddle._C_ops.matmul(reshape_167, parameter_160, False, False) + del parameter_160, reshape_167 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_84 = paddle._C_ops.add(matmul_68, parameter_159) + del matmul_68, parameter_159 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_75 = [slice_82, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_73 = paddle._C_ops.stack(combine_75, 0) + del combine_75 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_171 = paddle._C_ops.reshape(add_84, stack_73) + del add_84, stack_73 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_171, [2, 0, 3, 1, 4]) + del reshape_171 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_83 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_84 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_85 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_74 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_83, full_7, float("0"), True) + del slice_83 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_84, [0, 1, 3, 2]) + del slice_84 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_69 = paddle._C_ops.matmul(scale_11, transpose_75, False, False) + del scale_11, transpose_75 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_172 = paddle._C_ops.reshape(data_23, full_int_array_7) + del data_23 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_11 = paddle._C_ops.index_select(data_24, reshape_172, 0) + del data_24, reshape_172 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_173 = paddle._C_ops.reshape(index_select_11, full_int_array_8) + del index_select_11 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_76 = paddle._C_ops.transpose(reshape_173, [2, 0, 1]) + del reshape_173 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_0) + del transpose_76 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_85 = paddle._C_ops.add(matmul_69, unsqueeze_33) + del matmul_69, unsqueeze_33 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_5 = paddle._C_ops.floor_divide(slice_82, full_36) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_76 = [floor_divide_5, full_20, full_31, full_4, full_4] + del floor_divide_5 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_74 = paddle._C_ops.stack(combine_76, 0) + del combine_76 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_174 = paddle._C_ops.reshape(add_85, stack_74) + del add_85, stack_74 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(where_11, full_int_array_1) + del where_11 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_34, full_int_array_0) + del unsqueeze_34 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_86 = paddle._C_ops.add(reshape_174, unsqueeze_35) + del reshape_174, unsqueeze_35 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_77 = [slice_82, full_31, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_75 = paddle._C_ops.stack(combine_77, 0) + del combine_77 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(add_86, stack_75) + del add_86, stack_75 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_11 = paddle._C_ops.softmax(reshape_175, -1) + del reshape_175 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_70 = paddle._C_ops.matmul(softmax_11, slice_85, False, False) + del slice_85, softmax_11 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_70, [0, 2, 1, 3]) + del matmul_70 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_78 = [slice_82, full_4, full_17] + del slice_82 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_76 = paddle._C_ops.stack(combine_78, 0) + del combine_78 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_176 = paddle._C_ops.reshape(transpose_77, stack_76) + del stack_76, transpose_77 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_71 = paddle._C_ops.matmul(reshape_176, parameter_158, False, False) + del parameter_158, reshape_176 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_87 = paddle._C_ops.add(matmul_71, parameter_157) + del matmul_71, parameter_157 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_177 = paddle._C_ops.reshape(add_87, full_int_array_38) + del add_87 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_178 = paddle._C_ops.reshape(reshape_177, full_int_array_40) + del reshape_177 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_78 = paddle._C_ops.transpose(reshape_178, [0, 1, 3, 2, 4, 5]) + del reshape_178 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_179 = paddle._C_ops.reshape(transpose_78, full_int_array_41) + del transpose_78 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_179, full_int_array_29, [1, 2]) + del reshape_179 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_79 = [slice_79, full_32, full_17] + del slice_79 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_77 = paddle._C_ops.stack(combine_79, 0) + del combine_79 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_180 = paddle._C_ops.reshape(roll_11, stack_77) + del roll_11, stack_77 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_88 = paddle._C_ops.add(add_83, reshape_180) + del add_83, reshape_180 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_88, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_72 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del layer_norm_78, parameter_154 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_89 = paddle._C_ops.add(matmul_72, parameter_153) + del matmul_72, parameter_153 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_11 = paddle._C_ops.gelu(add_89, False) + del add_89 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_73 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del gelu_11, parameter_152 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_90 = paddle._C_ops.add(matmul_73, parameter_151) + del matmul_73, parameter_151 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_91 = paddle._C_ops.add(add_88, add_90) + del add_88, add_90 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_50 = paddle._C_ops.shape64(add_91) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_86 = paddle._C_ops.slice( + shape64_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_50 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_91, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_80 = [slice_86, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_78 = paddle._C_ops.stack(combine_80, 0) + del combine_80 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(layer_norm_81, stack_78) + del layer_norm_81, stack_78 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_51 = paddle._C_ops.shape64(reshape_181) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_87 = paddle._C_ops.slice( + shape64_51, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_81 = [slice_87, full_30, full_3, full_30, full_3, full_17] + del slice_87 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_79 = paddle._C_ops.stack(combine_81, 0) + del combine_81 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, stack_79) + del reshape_181, stack_79 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_79 = paddle._C_ops.transpose(reshape_182, [0, 1, 3, 2, 4, 5]) + del reshape_182 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(transpose_79, full_int_array_38) + del transpose_79 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_184 = paddle._C_ops.reshape(reshape_183, full_int_array_39) + del reshape_183 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_52 = paddle._C_ops.shape64(reshape_184) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_88 = paddle._C_ops.slice( + shape64_52, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_52 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_74 = paddle._C_ops.matmul(reshape_184, parameter_148, False, False) + del parameter_148, reshape_184 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_92 = paddle._C_ops.add(matmul_74, parameter_147) + del matmul_74, parameter_147 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_82 = [slice_88, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_80 = paddle._C_ops.stack(combine_82, 0) + del combine_82 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_185 = paddle._C_ops.reshape(add_92, stack_80) + del add_92, stack_80 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_185, [2, 0, 3, 1, 4]) + del reshape_185 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_89 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_90 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_91 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_80 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_89, full_7, float("0"), True) + del slice_89 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_90, [0, 1, 3, 2]) + del slice_90 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_75 = paddle._C_ops.matmul(scale_12, transpose_81, False, False) + del scale_12, transpose_81 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_25, full_int_array_7) + del data_25 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_12 = paddle._C_ops.index_select(data_26, reshape_186, 0) + del data_26, reshape_186 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_187 = paddle._C_ops.reshape(index_select_12, full_int_array_8) + del index_select_12 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_82 = paddle._C_ops.transpose(reshape_187, [2, 0, 1]) + del reshape_187 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_0) + del transpose_82 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_93 = paddle._C_ops.add(matmul_75, unsqueeze_36) + del matmul_75, unsqueeze_36 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_12 = paddle._C_ops.softmax(add_93, -1) + del add_93 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_76 = paddle._C_ops.matmul(softmax_12, slice_91, False, False) + del slice_91, softmax_12 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_76, [0, 2, 1, 3]) + del matmul_76 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_83 = [slice_88, full_4, full_17] + del slice_88 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_81 = paddle._C_ops.stack(combine_83, 0) + del combine_83 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_83, stack_81) + del stack_81, transpose_83 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_77 = paddle._C_ops.matmul(reshape_188, parameter_146, False, False) + del parameter_146, reshape_188 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_94 = paddle._C_ops.add(matmul_77, parameter_145) + del matmul_77, parameter_145 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_94, full_int_array_38) + del add_94 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_190 = paddle._C_ops.reshape(reshape_189, full_int_array_40) + del reshape_189 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_84 = paddle._C_ops.transpose(reshape_190, [0, 1, 3, 2, 4, 5]) + del reshape_190 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_191 = paddle._C_ops.reshape(transpose_84, full_int_array_41) + del transpose_84 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_84 = [slice_86, full_32, full_17] + del slice_86 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_82 = paddle._C_ops.stack(combine_84, 0) + del combine_84 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_192 = paddle._C_ops.reshape(reshape_191, stack_82) + del reshape_191, stack_82 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_95 = paddle._C_ops.add(add_91, reshape_192) + del add_91, reshape_192 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del layer_norm_84, parameter_142 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_96 = paddle._C_ops.add(matmul_78, parameter_141) + del matmul_78, parameter_141 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_12 = paddle._C_ops.gelu(add_96, False) + del add_96 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_79 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del gelu_12, parameter_140 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_97 = paddle._C_ops.add(matmul_79, parameter_139) + del matmul_79, parameter_139 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_98 = paddle._C_ops.add(add_95, add_97) + del add_95, add_97 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_53 = paddle._C_ops.shape64(add_98) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_92 = paddle._C_ops.slice( + shape64_53, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_53 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_85 = [slice_92, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_83 = paddle._C_ops.stack(combine_85, 0) + del combine_85 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_193 = paddle._C_ops.reshape(layer_norm_87, stack_83) + del layer_norm_87, stack_83 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_54 = paddle._C_ops.shape64(reshape_193) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_93 = paddle._C_ops.slice( + shape64_54, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_54 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_193, full_int_array_11, [1, 2]) + del reshape_193 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_55 = paddle._C_ops.shape64(roll_12) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_94 = paddle._C_ops.slice( + shape64_55, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_55 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_86 = [slice_94, full_30, full_3, full_30, full_3, full_17] + del slice_94 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_84 = paddle._C_ops.stack(combine_86, 0) + del combine_86 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_194 = paddle._C_ops.reshape(roll_12, stack_84) + del roll_12, stack_84 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_85 = paddle._C_ops.transpose(reshape_194, [0, 1, 3, 2, 4, 5]) + del reshape_194 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_85, full_int_array_38) + del transpose_85 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_39) + del reshape_195 + + # pd_op.full: (1x14x14x1xf32) <- () + full_40 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_40, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_197 = paddle._C_ops.reshape(set_value__6, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_86 = paddle._C_ops.transpose(reshape_197, [0, 1, 3, 2, 4, 5]) + del reshape_197 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_198 = paddle._C_ops.reshape(transpose_86, full_int_array_27) + del transpose_86 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_199 = paddle._C_ops.reshape(reshape_198, full_int_array_28) + del reshape_198 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_199, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_199, full_int_array_5) + del reshape_199 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_34, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_13 = paddle._C_ops.where(equal_6, full_35, where_12) + del equal_6, where_12 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_56 = paddle._C_ops.shape64(reshape_196) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_95 = paddle._C_ops.slice( + shape64_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_56 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_80 = paddle._C_ops.matmul(reshape_196, parameter_136, False, False) + del parameter_136, reshape_196 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_99 = paddle._C_ops.add(matmul_80, parameter_135) + del matmul_80, parameter_135 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_87 = [slice_95, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_85 = paddle._C_ops.stack(combine_87, 0) + del combine_87 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_200 = paddle._C_ops.reshape(add_99, stack_85) + del add_99, stack_85 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_87 = paddle._C_ops.transpose(reshape_200, [2, 0, 3, 1, 4]) + del reshape_200 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_96 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_97 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_98 = paddle._C_ops.slice( + transpose_87, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_87 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_96, full_7, float("0"), True) + del slice_96 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_88 = paddle._C_ops.transpose(slice_97, [0, 1, 3, 2]) + del slice_97 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_81 = paddle._C_ops.matmul(scale_13, transpose_88, False, False) + del scale_13, transpose_88 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_201 = paddle._C_ops.reshape(data_27, full_int_array_7) + del data_27 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_13 = paddle._C_ops.index_select(data_28, reshape_201, 0) + del data_28, reshape_201 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_202 = paddle._C_ops.reshape(index_select_13, full_int_array_8) + del index_select_13 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_89 = paddle._C_ops.transpose(reshape_202, [2, 0, 1]) + del reshape_202 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(transpose_89, full_int_array_0) + del transpose_89 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_100 = paddle._C_ops.add(matmul_81, unsqueeze_39) + del matmul_81, unsqueeze_39 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_6 = paddle._C_ops.floor_divide(slice_95, full_36) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_88 = [floor_divide_6, full_20, full_31, full_4, full_4] + del floor_divide_6 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_86 = paddle._C_ops.stack(combine_88, 0) + del combine_88 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_203 = paddle._C_ops.reshape(add_100, stack_86) + del add_100, stack_86 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(where_13, full_int_array_1) + del where_13 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(unsqueeze_40, full_int_array_0) + del unsqueeze_40 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_101 = paddle._C_ops.add(reshape_203, unsqueeze_41) + del reshape_203, unsqueeze_41 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_89 = [slice_95, full_31, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_87 = paddle._C_ops.stack(combine_89, 0) + del combine_89 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_204 = paddle._C_ops.reshape(add_101, stack_87) + del add_101, stack_87 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_13 = paddle._C_ops.softmax(reshape_204, -1) + del reshape_204 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_82 = paddle._C_ops.matmul(softmax_13, slice_98, False, False) + del slice_98, softmax_13 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_90 = paddle._C_ops.transpose(matmul_82, [0, 2, 1, 3]) + del matmul_82 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_90 = [slice_95, full_4, full_17] + del slice_95 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_88 = paddle._C_ops.stack(combine_90, 0) + del combine_90 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_205 = paddle._C_ops.reshape(transpose_90, stack_88) + del stack_88, transpose_90 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_83 = paddle._C_ops.matmul(reshape_205, parameter_134, False, False) + del parameter_134, reshape_205 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_102 = paddle._C_ops.add(matmul_83, parameter_133) + del matmul_83, parameter_133 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_206 = paddle._C_ops.reshape(add_102, full_int_array_38) + del add_102 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_207 = paddle._C_ops.reshape(reshape_206, full_int_array_40) + del reshape_206 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_91 = paddle._C_ops.transpose(reshape_207, [0, 1, 3, 2, 4, 5]) + del reshape_207 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(transpose_91, full_int_array_41) + del transpose_91 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_208, full_int_array_29, [1, 2]) + del reshape_208 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_91 = [slice_92, full_32, full_17] + del slice_92 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_89 = paddle._C_ops.stack(combine_91, 0) + del combine_91 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_209 = paddle._C_ops.reshape(roll_13, stack_89) + del roll_13, stack_89 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_103 = paddle._C_ops.add(add_98, reshape_209) + del add_98, reshape_209 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_103, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_84 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del layer_norm_90, parameter_130 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_104 = paddle._C_ops.add(matmul_84, parameter_129) + del matmul_84, parameter_129 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_13 = paddle._C_ops.gelu(add_104, False) + del add_104 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_85 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del gelu_13, parameter_128 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_105 = paddle._C_ops.add(matmul_85, parameter_127) + del matmul_85, parameter_127 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_106 = paddle._C_ops.add(add_103, add_105) + del add_103, add_105 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_57 = paddle._C_ops.shape64(add_106) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_99 = paddle._C_ops.slice( + shape64_57, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_57 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_106, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_92 = [slice_99, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_90 = paddle._C_ops.stack(combine_92, 0) + del combine_92 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_210 = paddle._C_ops.reshape(layer_norm_93, stack_90) + del layer_norm_93, stack_90 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_58 = paddle._C_ops.shape64(reshape_210) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_100 = paddle._C_ops.slice( + shape64_58, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_58 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_93 = [slice_100, full_30, full_3, full_30, full_3, full_17] + del slice_100 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_91 = paddle._C_ops.stack(combine_93, 0) + del combine_93 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_210, stack_91) + del reshape_210, stack_91 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_92 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_212 = paddle._C_ops.reshape(transpose_92, full_int_array_38) + del transpose_92 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(reshape_212, full_int_array_39) + del reshape_212 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_59 = paddle._C_ops.shape64(reshape_213) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_101 = paddle._C_ops.slice( + shape64_59, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_59 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_86 = paddle._C_ops.matmul(reshape_213, parameter_124, False, False) + del parameter_124, reshape_213 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_107 = paddle._C_ops.add(matmul_86, parameter_123) + del matmul_86, parameter_123 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_94 = [slice_101, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_92 = paddle._C_ops.stack(combine_94, 0) + del combine_94 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_214 = paddle._C_ops.reshape(add_107, stack_92) + del add_107, stack_92 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_93 = paddle._C_ops.transpose(reshape_214, [2, 0, 3, 1, 4]) + del reshape_214 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_102 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_103 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_104 = paddle._C_ops.slice( + transpose_93, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_93 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_102, full_7, float("0"), True) + del slice_102 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_94 = paddle._C_ops.transpose(slice_103, [0, 1, 3, 2]) + del slice_103 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_87 = paddle._C_ops.matmul(scale_14, transpose_94, False, False) + del scale_14, transpose_94 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_215 = paddle._C_ops.reshape(data_29, full_int_array_7) + del data_29 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_14 = paddle._C_ops.index_select(data_30, reshape_215, 0) + del data_30, reshape_215 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_216 = paddle._C_ops.reshape(index_select_14, full_int_array_8) + del index_select_14 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_95 = paddle._C_ops.transpose(reshape_216, [2, 0, 1]) + del reshape_216 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(transpose_95, full_int_array_0) + del transpose_95 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_108 = paddle._C_ops.add(matmul_87, unsqueeze_42) + del matmul_87, unsqueeze_42 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_14 = paddle._C_ops.softmax(add_108, -1) + del add_108 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_88 = paddle._C_ops.matmul(softmax_14, slice_104, False, False) + del slice_104, softmax_14 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_96 = paddle._C_ops.transpose(matmul_88, [0, 2, 1, 3]) + del matmul_88 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_95 = [slice_101, full_4, full_17] + del slice_101 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_93 = paddle._C_ops.stack(combine_95, 0) + del combine_95 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_217 = paddle._C_ops.reshape(transpose_96, stack_93) + del stack_93, transpose_96 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_89 = paddle._C_ops.matmul(reshape_217, parameter_122, False, False) + del parameter_122, reshape_217 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_109 = paddle._C_ops.add(matmul_89, parameter_121) + del matmul_89, parameter_121 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_218 = paddle._C_ops.reshape(add_109, full_int_array_38) + del add_109 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_219 = paddle._C_ops.reshape(reshape_218, full_int_array_40) + del reshape_218 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_97 = paddle._C_ops.transpose(reshape_219, [0, 1, 3, 2, 4, 5]) + del reshape_219 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_220 = paddle._C_ops.reshape(transpose_97, full_int_array_41) + del transpose_97 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_96 = [slice_99, full_32, full_17] + del slice_99 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_94 = paddle._C_ops.stack(combine_96, 0) + del combine_96 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_221 = paddle._C_ops.reshape(reshape_220, stack_94) + del reshape_220, stack_94 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_110 = paddle._C_ops.add(add_106, reshape_221) + del add_106, reshape_221 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_110, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del layer_norm_96, parameter_118 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_111 = paddle._C_ops.add(matmul_90, parameter_117) + del matmul_90, parameter_117 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_14 = paddle._C_ops.gelu(add_111, False) + del add_111 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_91 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del gelu_14, parameter_116 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_112 = paddle._C_ops.add(matmul_91, parameter_115) + del matmul_91, parameter_115 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_113 = paddle._C_ops.add(add_110, add_112) + del add_110, add_112 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_60 = paddle._C_ops.shape64(add_113) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_105 = paddle._C_ops.slice( + shape64_60, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_60 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_113, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_97 = [slice_105, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_95 = paddle._C_ops.stack(combine_97, 0) + del combine_97 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_222 = paddle._C_ops.reshape(layer_norm_99, stack_95) + del layer_norm_99, stack_95 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_61 = paddle._C_ops.shape64(reshape_222) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_106 = paddle._C_ops.slice( + shape64_61, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_61 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_222, full_int_array_11, [1, 2]) + del reshape_222 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_62 = paddle._C_ops.shape64(roll_14) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_107 = paddle._C_ops.slice( + shape64_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_62 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_98 = [slice_107, full_30, full_3, full_30, full_3, full_17] + del slice_107 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_96 = paddle._C_ops.stack(combine_98, 0) + del combine_98 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_223 = paddle._C_ops.reshape(roll_14, stack_96) + del roll_14, stack_96 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_98 = paddle._C_ops.transpose(reshape_223, [0, 1, 3, 2, 4, 5]) + del reshape_223 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_224 = paddle._C_ops.reshape(transpose_98, full_int_array_38) + del transpose_98 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_225 = paddle._C_ops.reshape(reshape_224, full_int_array_39) + del reshape_224 + + # pd_op.full: (1x14x14x1xf32) <- () + full_41 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_41, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_226 = paddle._C_ops.reshape(set_value__7, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_99 = paddle._C_ops.transpose(reshape_226, [0, 1, 3, 2, 4, 5]) + del reshape_226 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_227 = paddle._C_ops.reshape(transpose_99, full_int_array_27) + del transpose_99 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_228 = paddle._C_ops.reshape(reshape_227, full_int_array_28) + del reshape_227 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_228, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_228, full_int_array_5) + del reshape_228 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_34, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_15 = paddle._C_ops.where(equal_7, full_35, where_14) + del equal_7, where_14 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_63 = paddle._C_ops.shape64(reshape_225) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_108 = paddle._C_ops.slice( + shape64_63, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_63 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_92 = paddle._C_ops.matmul(reshape_225, parameter_112, False, False) + del parameter_112, reshape_225 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_114 = paddle._C_ops.add(matmul_92, parameter_111) + del matmul_92, parameter_111 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_99 = [slice_108, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_97 = paddle._C_ops.stack(combine_99, 0) + del combine_99 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_229 = paddle._C_ops.reshape(add_114, stack_97) + del add_114, stack_97 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_100 = paddle._C_ops.transpose(reshape_229, [2, 0, 3, 1, 4]) + del reshape_229 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_109 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_110 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_111 = paddle._C_ops.slice( + transpose_100, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_100 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_109, full_7, float("0"), True) + del slice_109 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_101 = paddle._C_ops.transpose(slice_110, [0, 1, 3, 2]) + del slice_110 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_93 = paddle._C_ops.matmul(scale_15, transpose_101, False, False) + del scale_15, transpose_101 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_230 = paddle._C_ops.reshape(data_31, full_int_array_7) + del data_31 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_15 = paddle._C_ops.index_select(data_32, reshape_230, 0) + del data_32, reshape_230 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_231 = paddle._C_ops.reshape(index_select_15, full_int_array_8) + del index_select_15 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_102 = paddle._C_ops.transpose(reshape_231, [2, 0, 1]) + del reshape_231 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(transpose_102, full_int_array_0) + del transpose_102 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_115 = paddle._C_ops.add(matmul_93, unsqueeze_45) + del matmul_93, unsqueeze_45 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_7 = paddle._C_ops.floor_divide(slice_108, full_36) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_100 = [floor_divide_7, full_20, full_31, full_4, full_4] + del floor_divide_7 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_98 = paddle._C_ops.stack(combine_100, 0) + del combine_100 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_115, stack_98) + del add_115, stack_98 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(where_15, full_int_array_1) + del where_15 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(unsqueeze_46, full_int_array_0) + del unsqueeze_46 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_116 = paddle._C_ops.add(reshape_232, unsqueeze_47) + del reshape_232, unsqueeze_47 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_101 = [slice_108, full_31, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_99 = paddle._C_ops.stack(combine_101, 0) + del combine_101 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_233 = paddle._C_ops.reshape(add_116, stack_99) + del add_116, stack_99 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_15 = paddle._C_ops.softmax(reshape_233, -1) + del reshape_233 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_94 = paddle._C_ops.matmul(softmax_15, slice_111, False, False) + del slice_111, softmax_15 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_103 = paddle._C_ops.transpose(matmul_94, [0, 2, 1, 3]) + del matmul_94 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_102 = [slice_108, full_4, full_17] + del slice_108 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_100 = paddle._C_ops.stack(combine_102, 0) + del combine_102 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_234 = paddle._C_ops.reshape(transpose_103, stack_100) + del stack_100, transpose_103 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_95 = paddle._C_ops.matmul(reshape_234, parameter_110, False, False) + del parameter_110, reshape_234 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_117 = paddle._C_ops.add(matmul_95, parameter_109) + del matmul_95, parameter_109 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_235 = paddle._C_ops.reshape(add_117, full_int_array_38) + del add_117 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_236 = paddle._C_ops.reshape(reshape_235, full_int_array_40) + del reshape_235 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_104 = paddle._C_ops.transpose(reshape_236, [0, 1, 3, 2, 4, 5]) + del reshape_236 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_237 = paddle._C_ops.reshape(transpose_104, full_int_array_41) + del transpose_104 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_237, full_int_array_29, [1, 2]) + del reshape_237 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_103 = [slice_105, full_32, full_17] + del slice_105 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_101 = paddle._C_ops.stack(combine_103, 0) + del combine_103 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_238 = paddle._C_ops.reshape(roll_15, stack_101) + del roll_15, stack_101 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_118 = paddle._C_ops.add(add_113, reshape_238) + del add_113, reshape_238 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_96 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del layer_norm_102, parameter_106 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_119 = paddle._C_ops.add(matmul_96, parameter_105) + del matmul_96, parameter_105 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_15 = paddle._C_ops.gelu(add_119, False) + del add_119 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_97 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del gelu_15, parameter_104 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_120 = paddle._C_ops.add(matmul_97, parameter_103) + del matmul_97, parameter_103 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_121 = paddle._C_ops.add(add_118, add_120) + del add_118, add_120 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_64 = paddle._C_ops.shape64(add_121) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_112 = paddle._C_ops.slice( + shape64_64, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_64 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_121, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_104 = [slice_112, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_102 = paddle._C_ops.stack(combine_104, 0) + del combine_104 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_239 = paddle._C_ops.reshape(layer_norm_105, stack_102) + del layer_norm_105, stack_102 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_65 = paddle._C_ops.shape64(reshape_239) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_113 = paddle._C_ops.slice( + shape64_65, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_65 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_105 = [slice_113, full_30, full_3, full_30, full_3, full_17] + del slice_113 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_103 = paddle._C_ops.stack(combine_105, 0) + del combine_105 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_239, stack_103) + del reshape_239, stack_103 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_105 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_241 = paddle._C_ops.reshape(transpose_105, full_int_array_38) + del transpose_105 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_242 = paddle._C_ops.reshape(reshape_241, full_int_array_39) + del reshape_241 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_66 = paddle._C_ops.shape64(reshape_242) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_114 = paddle._C_ops.slice( + shape64_66, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_66 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_98 = paddle._C_ops.matmul(reshape_242, parameter_100, False, False) + del parameter_100, reshape_242 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_122 = paddle._C_ops.add(matmul_98, parameter_99) + del matmul_98, parameter_99 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_106 = [slice_114, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_104 = paddle._C_ops.stack(combine_106, 0) + del combine_106 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_243 = paddle._C_ops.reshape(add_122, stack_104) + del add_122, stack_104 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_106 = paddle._C_ops.transpose(reshape_243, [2, 0, 3, 1, 4]) + del reshape_243 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_115 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_116 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_117 = paddle._C_ops.slice( + transpose_106, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_106 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_115, full_7, float("0"), True) + del slice_115 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_107 = paddle._C_ops.transpose(slice_116, [0, 1, 3, 2]) + del slice_116 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_99 = paddle._C_ops.matmul(scale_16, transpose_107, False, False) + del scale_16, transpose_107 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_244 = paddle._C_ops.reshape(data_33, full_int_array_7) + del data_33 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_16 = paddle._C_ops.index_select(data_34, reshape_244, 0) + del data_34, reshape_244 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_245 = paddle._C_ops.reshape(index_select_16, full_int_array_8) + del index_select_16 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_108 = paddle._C_ops.transpose(reshape_245, [2, 0, 1]) + del reshape_245 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(transpose_108, full_int_array_0) + del transpose_108 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_123 = paddle._C_ops.add(matmul_99, unsqueeze_48) + del matmul_99, unsqueeze_48 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_16 = paddle._C_ops.softmax(add_123, -1) + del add_123 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_100 = paddle._C_ops.matmul(softmax_16, slice_117, False, False) + del slice_117, softmax_16 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_109 = paddle._C_ops.transpose(matmul_100, [0, 2, 1, 3]) + del matmul_100 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_107 = [slice_114, full_4, full_17] + del slice_114 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_105 = paddle._C_ops.stack(combine_107, 0) + del combine_107 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(transpose_109, stack_105) + del stack_105, transpose_109 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_101 = paddle._C_ops.matmul(reshape_246, parameter_98, False, False) + del parameter_98, reshape_246 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_124 = paddle._C_ops.add(matmul_101, parameter_97) + del matmul_101, parameter_97 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_124, full_int_array_38) + del add_124 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_247, full_int_array_40) + del reshape_247 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_110 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_249 = paddle._C_ops.reshape(transpose_110, full_int_array_41) + del transpose_110 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_108 = [slice_112, full_32, full_17] + del slice_112 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_106 = paddle._C_ops.stack(combine_108, 0) + del combine_108 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_250 = paddle._C_ops.reshape(reshape_249, stack_106) + del reshape_249, stack_106 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_125 = paddle._C_ops.add(add_121, reshape_250) + del add_121, reshape_250 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_125, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_102 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del layer_norm_108, parameter_94 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_126 = paddle._C_ops.add(matmul_102, parameter_93) + del matmul_102, parameter_93 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_16 = paddle._C_ops.gelu(add_126, False) + del add_126 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_103 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del gelu_16, parameter_92 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_127 = paddle._C_ops.add(matmul_103, parameter_91) + del matmul_103, parameter_91 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_128 = paddle._C_ops.add(add_125, add_127) + del add_125, add_127 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_67 = paddle._C_ops.shape64(add_128) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_118 = paddle._C_ops.slice( + shape64_67, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_67 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_128, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_109 = [slice_118, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_107 = paddle._C_ops.stack(combine_109, 0) + del combine_109 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_251 = paddle._C_ops.reshape(layer_norm_111, stack_107) + del layer_norm_111, stack_107 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_68 = paddle._C_ops.shape64(reshape_251) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_119 = paddle._C_ops.slice( + shape64_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_68 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_251, full_int_array_11, [1, 2]) + del reshape_251 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_69 = paddle._C_ops.shape64(roll_16) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_120 = paddle._C_ops.slice( + shape64_69, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_69 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_110 = [slice_120, full_30, full_3, full_30, full_3, full_17] + del slice_120 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_108 = paddle._C_ops.stack(combine_110, 0) + del combine_110 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(roll_16, stack_108) + del roll_16, stack_108 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_111 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_253 = paddle._C_ops.reshape(transpose_111, full_int_array_38) + del transpose_111 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_254 = paddle._C_ops.reshape(reshape_253, full_int_array_39) + del reshape_253 + + # pd_op.full: (1x14x14x1xf32) <- () + full_42 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_42, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_255 = paddle._C_ops.reshape(set_value__8, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_112 = paddle._C_ops.transpose(reshape_255, [0, 1, 3, 2, 4, 5]) + del reshape_255 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_256 = paddle._C_ops.reshape(transpose_112, full_int_array_27) + del transpose_112 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_257 = paddle._C_ops.reshape(reshape_256, full_int_array_28) + del reshape_256 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_257, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_257, full_int_array_5) + del reshape_257 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_34, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_17 = paddle._C_ops.where(equal_8, full_35, where_16) + del equal_8, where_16 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_70 = paddle._C_ops.shape64(reshape_254) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_121 = paddle._C_ops.slice( + shape64_70, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_70 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_104 = paddle._C_ops.matmul(reshape_254, parameter_88, False, False) + del parameter_88, reshape_254 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_129 = paddle._C_ops.add(matmul_104, parameter_87) + del matmul_104, parameter_87 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_111 = [slice_121, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_109 = paddle._C_ops.stack(combine_111, 0) + del combine_111 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_258 = paddle._C_ops.reshape(add_129, stack_109) + del add_129, stack_109 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_113 = paddle._C_ops.transpose(reshape_258, [2, 0, 3, 1, 4]) + del reshape_258 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_122 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_123 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_124 = paddle._C_ops.slice( + transpose_113, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_113 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_122, full_7, float("0"), True) + del slice_122 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_114 = paddle._C_ops.transpose(slice_123, [0, 1, 3, 2]) + del slice_123 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_105 = paddle._C_ops.matmul(scale_17, transpose_114, False, False) + del scale_17, transpose_114 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_259 = paddle._C_ops.reshape(data_35, full_int_array_7) + del data_35 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_17 = paddle._C_ops.index_select(data_36, reshape_259, 0) + del data_36, reshape_259 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_260 = paddle._C_ops.reshape(index_select_17, full_int_array_8) + del index_select_17 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_115 = paddle._C_ops.transpose(reshape_260, [2, 0, 1]) + del reshape_260 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(transpose_115, full_int_array_0) + del transpose_115 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_130 = paddle._C_ops.add(matmul_105, unsqueeze_51) + del matmul_105, unsqueeze_51 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_8 = paddle._C_ops.floor_divide(slice_121, full_36) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_112 = [floor_divide_8, full_20, full_31, full_4, full_4] + del floor_divide_8 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_110 = paddle._C_ops.stack(combine_112, 0) + del combine_112 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_261 = paddle._C_ops.reshape(add_130, stack_110) + del add_130, stack_110 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(where_17, full_int_array_1) + del where_17 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(unsqueeze_52, full_int_array_0) + del unsqueeze_52 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_131 = paddle._C_ops.add(reshape_261, unsqueeze_53) + del reshape_261, unsqueeze_53 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_113 = [slice_121, full_31, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_111 = paddle._C_ops.stack(combine_113, 0) + del combine_113 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_262 = paddle._C_ops.reshape(add_131, stack_111) + del add_131, stack_111 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_17 = paddle._C_ops.softmax(reshape_262, -1) + del reshape_262 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_106 = paddle._C_ops.matmul(softmax_17, slice_124, False, False) + del slice_124, softmax_17 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_116 = paddle._C_ops.transpose(matmul_106, [0, 2, 1, 3]) + del matmul_106 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_114 = [slice_121, full_4, full_17] + del slice_121 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_112 = paddle._C_ops.stack(combine_114, 0) + del combine_114 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(transpose_116, stack_112) + del stack_112, transpose_116 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_107 = paddle._C_ops.matmul(reshape_263, parameter_86, False, False) + del parameter_86, reshape_263 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_132 = paddle._C_ops.add(matmul_107, parameter_85) + del matmul_107, parameter_85 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_264 = paddle._C_ops.reshape(add_132, full_int_array_38) + del add_132 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(reshape_264, full_int_array_40) + del reshape_264 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_117 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_266 = paddle._C_ops.reshape(transpose_117, full_int_array_41) + del transpose_117 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_266, full_int_array_29, [1, 2]) + del reshape_266 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_115 = [slice_118, full_32, full_17] + del slice_118 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_113 = paddle._C_ops.stack(combine_115, 0) + del combine_115 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_267 = paddle._C_ops.reshape(roll_17, stack_113) + del roll_17, stack_113 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_133 = paddle._C_ops.add(add_128, reshape_267) + del add_128, reshape_267 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_133, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_108 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del layer_norm_114, parameter_82 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_134 = paddle._C_ops.add(matmul_108, parameter_81) + del matmul_108, parameter_81 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_17 = paddle._C_ops.gelu(add_134, False) + del add_134 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_109 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del gelu_17, parameter_80 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_135 = paddle._C_ops.add(matmul_109, parameter_79) + del matmul_109, parameter_79 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_136 = paddle._C_ops.add(add_133, add_135) + del add_133, add_135 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_71 = paddle._C_ops.shape64(add_136) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_125 = paddle._C_ops.slice( + shape64_71, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_71 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_136, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_116 = [slice_125, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_114 = paddle._C_ops.stack(combine_116, 0) + del combine_116 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_268 = paddle._C_ops.reshape(layer_norm_117, stack_114) + del layer_norm_117, stack_114 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_72 = paddle._C_ops.shape64(reshape_268) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_126 = paddle._C_ops.slice( + shape64_72, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_72 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_117 = [slice_126, full_30, full_3, full_30, full_3, full_17] + del slice_126 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_115 = paddle._C_ops.stack(combine_117, 0) + del combine_117 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_269 = paddle._C_ops.reshape(reshape_268, stack_115) + del reshape_268, stack_115 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_118 = paddle._C_ops.transpose(reshape_269, [0, 1, 3, 2, 4, 5]) + del reshape_269 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_270 = paddle._C_ops.reshape(transpose_118, full_int_array_38) + del transpose_118 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_271 = paddle._C_ops.reshape(reshape_270, full_int_array_39) + del reshape_270 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_73 = paddle._C_ops.shape64(reshape_271) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_127 = paddle._C_ops.slice( + shape64_73, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_73 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_110 = paddle._C_ops.matmul(reshape_271, parameter_76, False, False) + del parameter_76, reshape_271 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_137 = paddle._C_ops.add(matmul_110, parameter_75) + del matmul_110, parameter_75 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_118 = [slice_127, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_116 = paddle._C_ops.stack(combine_118, 0) + del combine_118 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_272 = paddle._C_ops.reshape(add_137, stack_116) + del add_137, stack_116 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_119 = paddle._C_ops.transpose(reshape_272, [2, 0, 3, 1, 4]) + del reshape_272 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_128 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_129 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_130 = paddle._C_ops.slice( + transpose_119, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_119 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_128, full_7, float("0"), True) + del slice_128 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_120 = paddle._C_ops.transpose(slice_129, [0, 1, 3, 2]) + del slice_129 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_111 = paddle._C_ops.matmul(scale_18, transpose_120, False, False) + del scale_18, transpose_120 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_273 = paddle._C_ops.reshape(data_37, full_int_array_7) + del data_37 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_18 = paddle._C_ops.index_select(data_38, reshape_273, 0) + del data_38, reshape_273 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_274 = paddle._C_ops.reshape(index_select_18, full_int_array_8) + del index_select_18 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_121 = paddle._C_ops.transpose(reshape_274, [2, 0, 1]) + del reshape_274 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(transpose_121, full_int_array_0) + del transpose_121 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_138 = paddle._C_ops.add(matmul_111, unsqueeze_54) + del matmul_111, unsqueeze_54 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_18 = paddle._C_ops.softmax(add_138, -1) + del add_138 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_112 = paddle._C_ops.matmul(softmax_18, slice_130, False, False) + del slice_130, softmax_18 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_122 = paddle._C_ops.transpose(matmul_112, [0, 2, 1, 3]) + del matmul_112 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_119 = [slice_127, full_4, full_17] + del slice_127 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_117 = paddle._C_ops.stack(combine_119, 0) + del combine_119 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(transpose_122, stack_117) + del stack_117, transpose_122 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_113 = paddle._C_ops.matmul(reshape_275, parameter_74, False, False) + del parameter_74, reshape_275 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_139 = paddle._C_ops.add(matmul_113, parameter_73) + del matmul_113, parameter_73 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_276 = paddle._C_ops.reshape(add_139, full_int_array_38) + del add_139 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(reshape_276, full_int_array_40) + del reshape_276 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_123 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_278 = paddle._C_ops.reshape(transpose_123, full_int_array_41) + del transpose_123 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_120 = [slice_125, full_32, full_17] + del slice_125 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_118 = paddle._C_ops.stack(combine_120, 0) + del combine_120 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_279 = paddle._C_ops.reshape(reshape_278, stack_118) + del reshape_278, stack_118 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_140 = paddle._C_ops.add(add_136, reshape_279) + del add_136, reshape_279 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_114 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del layer_norm_120, parameter_70 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_141 = paddle._C_ops.add(matmul_114, parameter_69) + del matmul_114, parameter_69 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_18 = paddle._C_ops.gelu(add_141, False) + del add_141 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_115 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del gelu_18, parameter_68 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_142 = paddle._C_ops.add(matmul_115, parameter_67) + del matmul_115, parameter_67 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_143 = paddle._C_ops.add(add_140, add_142) + del add_140, add_142 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_74 = paddle._C_ops.shape64(add_143) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_131 = paddle._C_ops.slice( + shape64_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_74 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_121 = [slice_131, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_119 = paddle._C_ops.stack(combine_121, 0) + del combine_121 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_280 = paddle._C_ops.reshape(layer_norm_123, stack_119) + del layer_norm_123, stack_119 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_75 = paddle._C_ops.shape64(reshape_280) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_132 = paddle._C_ops.slice( + shape64_75, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_75 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_280, full_int_array_11, [1, 2]) + del reshape_280 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_76 = paddle._C_ops.shape64(roll_18) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_133 = paddle._C_ops.slice( + shape64_76, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_76 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_122 = [slice_133, full_30, full_3, full_30, full_3, full_17] + del slice_133 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_120 = paddle._C_ops.stack(combine_122, 0) + del combine_122 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_281 = paddle._C_ops.reshape(roll_18, stack_120) + del roll_18, stack_120 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_124 = paddle._C_ops.transpose(reshape_281, [0, 1, 3, 2, 4, 5]) + del reshape_281 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_282 = paddle._C_ops.reshape(transpose_124, full_int_array_38) + del transpose_124 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_283 = paddle._C_ops.reshape(reshape_282, full_int_array_39) + del reshape_282 + + # pd_op.full: (1x14x14x1xf32) <- () + full_43 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_43, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_43 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(set_value__9, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_125 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_285 = paddle._C_ops.reshape(transpose_125, full_int_array_27) + del transpose_125 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_286 = paddle._C_ops.reshape(reshape_285, full_int_array_28) + del reshape_285 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_286, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_286, full_int_array_5) + del reshape_286 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_34, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_19 = paddle._C_ops.where(equal_9, full_35, where_18) + del equal_9, where_18 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_77 = paddle._C_ops.shape64(reshape_283) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_134 = paddle._C_ops.slice( + shape64_77, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_77 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_116 = paddle._C_ops.matmul(reshape_283, parameter_64, False, False) + del parameter_64, reshape_283 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_144 = paddle._C_ops.add(matmul_116, parameter_63) + del matmul_116, parameter_63 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_123 = [slice_134, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_121 = paddle._C_ops.stack(combine_123, 0) + del combine_123 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_287 = paddle._C_ops.reshape(add_144, stack_121) + del add_144, stack_121 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_126 = paddle._C_ops.transpose(reshape_287, [2, 0, 3, 1, 4]) + del reshape_287 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_135 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_136 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_137 = paddle._C_ops.slice( + transpose_126, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_126 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_135, full_7, float("0"), True) + del slice_135 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_127 = paddle._C_ops.transpose(slice_136, [0, 1, 3, 2]) + del slice_136 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_117 = paddle._C_ops.matmul(scale_19, transpose_127, False, False) + del scale_19, transpose_127 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_288 = paddle._C_ops.reshape(data_39, full_int_array_7) + del data_39 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_19 = paddle._C_ops.index_select(data_40, reshape_288, 0) + del data_40, reshape_288 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_289 = paddle._C_ops.reshape(index_select_19, full_int_array_8) + del index_select_19 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_128 = paddle._C_ops.transpose(reshape_289, [2, 0, 1]) + del reshape_289 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(transpose_128, full_int_array_0) + del transpose_128 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_145 = paddle._C_ops.add(matmul_117, unsqueeze_57) + del matmul_117, unsqueeze_57 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_9 = paddle._C_ops.floor_divide(slice_134, full_36) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_124 = [floor_divide_9, full_20, full_31, full_4, full_4] + del floor_divide_9 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_122 = paddle._C_ops.stack(combine_124, 0) + del combine_124 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_290 = paddle._C_ops.reshape(add_145, stack_122) + del add_145, stack_122 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(where_19, full_int_array_1) + del where_19 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(unsqueeze_58, full_int_array_0) + del unsqueeze_58 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_146 = paddle._C_ops.add(reshape_290, unsqueeze_59) + del reshape_290, unsqueeze_59 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_125 = [slice_134, full_31, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_123 = paddle._C_ops.stack(combine_125, 0) + del combine_125 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(add_146, stack_123) + del add_146, stack_123 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_19 = paddle._C_ops.softmax(reshape_291, -1) + del reshape_291 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_118 = paddle._C_ops.matmul(softmax_19, slice_137, False, False) + del slice_137, softmax_19 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_129 = paddle._C_ops.transpose(matmul_118, [0, 2, 1, 3]) + del matmul_118 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_126 = [slice_134, full_4, full_17] + del slice_134 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_124 = paddle._C_ops.stack(combine_126, 0) + del combine_126 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_292 = paddle._C_ops.reshape(transpose_129, stack_124) + del stack_124, transpose_129 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_119 = paddle._C_ops.matmul(reshape_292, parameter_62, False, False) + del parameter_62, reshape_292 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_147 = paddle._C_ops.add(matmul_119, parameter_61) + del matmul_119, parameter_61 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_293 = paddle._C_ops.reshape(add_147, full_int_array_38) + del add_147 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_294 = paddle._C_ops.reshape(reshape_293, full_int_array_40) + del reshape_293 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_130 = paddle._C_ops.transpose(reshape_294, [0, 1, 3, 2, 4, 5]) + del reshape_294 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(transpose_130, full_int_array_41) + del transpose_130 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_295, full_int_array_29, [1, 2]) + del reshape_295 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_127 = [slice_131, full_32, full_17] + del slice_131 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_125 = paddle._C_ops.stack(combine_127, 0) + del combine_127 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_296 = paddle._C_ops.reshape(roll_19, stack_125) + del roll_19, stack_125 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_148 = paddle._C_ops.add(add_143, reshape_296) + del add_143, reshape_296 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_148, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_120 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del layer_norm_126, parameter_58 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_149 = paddle._C_ops.add(matmul_120, parameter_57) + del matmul_120, parameter_57 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_19 = paddle._C_ops.gelu(add_149, False) + del add_149 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_121 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del gelu_19, parameter_56 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_150 = paddle._C_ops.add(matmul_121, parameter_55) + del matmul_121, parameter_55 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_151 = paddle._C_ops.add(add_148, add_150) + del add_148, add_150 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_78 = paddle._C_ops.shape64(add_151) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_138 = paddle._C_ops.slice( + shape64_78, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_78 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_128 = [slice_138, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_126 = paddle._C_ops.stack(combine_128, 0) + del combine_128 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_297 = paddle._C_ops.reshape(layer_norm_129, stack_126) + del layer_norm_129, stack_126 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_79 = paddle._C_ops.shape64(reshape_297) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_139 = paddle._C_ops.slice( + shape64_79, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_79 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_129 = [slice_139, full_30, full_3, full_30, full_3, full_17] + del slice_139 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_127 = paddle._C_ops.stack(combine_129, 0) + del combine_129 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_298 = paddle._C_ops.reshape(reshape_297, stack_127) + del reshape_297, stack_127 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_131 = paddle._C_ops.transpose(reshape_298, [0, 1, 3, 2, 4, 5]) + del reshape_298 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_299 = paddle._C_ops.reshape(transpose_131, full_int_array_38) + del transpose_131 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_300 = paddle._C_ops.reshape(reshape_299, full_int_array_39) + del reshape_299 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_80 = paddle._C_ops.shape64(reshape_300) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_140 = paddle._C_ops.slice( + shape64_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_80 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_122 = paddle._C_ops.matmul(reshape_300, parameter_52, False, False) + del parameter_52, reshape_300 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_152 = paddle._C_ops.add(matmul_122, parameter_51) + del matmul_122, parameter_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_130 = [slice_140, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_128 = paddle._C_ops.stack(combine_130, 0) + del combine_130 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_301 = paddle._C_ops.reshape(add_152, stack_128) + del add_152, stack_128 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_132 = paddle._C_ops.transpose(reshape_301, [2, 0, 3, 1, 4]) + del reshape_301 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_141 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_142 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_143 = paddle._C_ops.slice( + transpose_132, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_132 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_141, full_7, float("0"), True) + del slice_141 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_133 = paddle._C_ops.transpose(slice_142, [0, 1, 3, 2]) + del slice_142 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_123 = paddle._C_ops.matmul(scale_20, transpose_133, False, False) + del scale_20, transpose_133 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_302 = paddle._C_ops.reshape(data_41, full_int_array_7) + del data_41 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_20 = paddle._C_ops.index_select(data_42, reshape_302, 0) + del data_42, reshape_302 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_303 = paddle._C_ops.reshape(index_select_20, full_int_array_8) + del index_select_20 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_134 = paddle._C_ops.transpose(reshape_303, [2, 0, 1]) + del reshape_303 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(transpose_134, full_int_array_0) + del transpose_134 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_153 = paddle._C_ops.add(matmul_123, unsqueeze_60) + del matmul_123, unsqueeze_60 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_20 = paddle._C_ops.softmax(add_153, -1) + del add_153 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_20, slice_143, False, False) + del slice_143, softmax_20 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_135 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_131 = [slice_140, full_4, full_17] + del slice_140 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_129 = paddle._C_ops.stack(combine_131, 0) + del combine_131 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_304 = paddle._C_ops.reshape(transpose_135, stack_129) + del stack_129, transpose_135 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_125 = paddle._C_ops.matmul(reshape_304, parameter_50, False, False) + del parameter_50, reshape_304 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_154 = paddle._C_ops.add(matmul_125, parameter_49) + del matmul_125, parameter_49 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_305 = paddle._C_ops.reshape(add_154, full_int_array_38) + del add_154 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_306 = paddle._C_ops.reshape(reshape_305, full_int_array_40) + del reshape_305 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_136 = paddle._C_ops.transpose(reshape_306, [0, 1, 3, 2, 4, 5]) + del reshape_306 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(transpose_136, full_int_array_41) + del transpose_136 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_132 = [slice_138, full_32, full_17] + del slice_138 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_130 = paddle._C_ops.stack(combine_132, 0) + del combine_132 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_308 = paddle._C_ops.reshape(reshape_307, stack_130) + del reshape_307, stack_130 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_155 = paddle._C_ops.add(add_151, reshape_308) + del add_151, reshape_308 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_155, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_126 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del layer_norm_132, parameter_46 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_156 = paddle._C_ops.add(matmul_126, parameter_45) + del matmul_126, parameter_45 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_20 = paddle._C_ops.gelu(add_156, False) + del add_156 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_127 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del gelu_20, parameter_44 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_157 = paddle._C_ops.add(matmul_127, parameter_43) + del matmul_127, parameter_43 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_158 = paddle._C_ops.add(add_155, add_157) + del add_155, add_157 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_81 = paddle._C_ops.shape64(add_158) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_144 = paddle._C_ops.slice( + shape64_81, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_81 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_158, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_133 = [slice_144, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_131 = paddle._C_ops.stack(combine_133, 0) + del combine_133 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_309 = paddle._C_ops.reshape(layer_norm_135, stack_131) + del layer_norm_135, stack_131 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_82 = paddle._C_ops.shape64(reshape_309) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_145 = paddle._C_ops.slice( + shape64_82, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_82 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_309, full_int_array_11, [1, 2]) + del reshape_309 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_83 = paddle._C_ops.shape64(roll_20) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_146 = paddle._C_ops.slice( + shape64_83, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_83 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_134 = [slice_146, full_30, full_3, full_30, full_3, full_17] + del full_30, slice_146 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_132 = paddle._C_ops.stack(combine_134, 0) + del combine_134 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_310 = paddle._C_ops.reshape(roll_20, stack_132) + del roll_20, stack_132 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_137 = paddle._C_ops.transpose(reshape_310, [0, 1, 3, 2, 4, 5]) + del reshape_310 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_311 = paddle._C_ops.reshape(transpose_137, full_int_array_38) + del transpose_137 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_312 = paddle._C_ops.reshape(reshape_311, full_int_array_39) + del full_int_array_39, reshape_311 + + # pd_op.full: (1x14x14x1xf32) <- () + full_44 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_44, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(set_value__10, full_int_array_42) + del full_int_array_42 + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_138 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_314 = paddle._C_ops.reshape(transpose_138, full_int_array_27) + del transpose_138 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_315 = paddle._C_ops.reshape(reshape_314, full_int_array_28) + del reshape_314 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_315, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_315, full_int_array_5) + del reshape_315 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_34, subtract_10) + del full_34, not_equal_10, subtract_10 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_21 = paddle._C_ops.where(equal_10, full_35, where_20) + del equal_10, full_35, where_20 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_84 = paddle._C_ops.shape64(reshape_312) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_147 = paddle._C_ops.slice( + shape64_84, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_84 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_128 = paddle._C_ops.matmul(reshape_312, parameter_40, False, False) + del parameter_40, reshape_312 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_159 = paddle._C_ops.add(matmul_128, parameter_39) + del matmul_128, parameter_39 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_135 = [slice_147, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_133 = paddle._C_ops.stack(combine_135, 0) + del combine_135 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_316 = paddle._C_ops.reshape(add_159, stack_133) + del add_159, stack_133 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_139 = paddle._C_ops.transpose(reshape_316, [2, 0, 3, 1, 4]) + del reshape_316 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_148 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_149 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_150 = paddle._C_ops.slice( + transpose_139, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_139 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_148, full_7, float("0"), True) + del slice_148 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_140 = paddle._C_ops.transpose(slice_149, [0, 1, 3, 2]) + del slice_149 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_129 = paddle._C_ops.matmul(scale_21, transpose_140, False, False) + del scale_21, transpose_140 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_317 = paddle._C_ops.reshape(data_43, full_int_array_7) + del data_43 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_21 = paddle._C_ops.index_select(data_44, reshape_317, 0) + del data_44, reshape_317 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_21, full_int_array_8) + del index_select_21 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_141 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(transpose_141, full_int_array_0) + del transpose_141 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_160 = paddle._C_ops.add(matmul_129, unsqueeze_63) + del matmul_129, unsqueeze_63 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_10 = paddle._C_ops.floor_divide(slice_147, full_36) + del full_36 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_136 = [floor_divide_10, full_20, full_31, full_4, full_4] + del floor_divide_10, full_20 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_134 = paddle._C_ops.stack(combine_136, 0) + del combine_136 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_319 = paddle._C_ops.reshape(add_160, stack_134) + del add_160, stack_134 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(where_21, full_int_array_1) + del where_21 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(unsqueeze_64, full_int_array_0) + del unsqueeze_64 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_161 = paddle._C_ops.add(reshape_319, unsqueeze_65) + del reshape_319, unsqueeze_65 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_137 = [slice_147, full_31, full_4, full_4] + del full_31 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_135 = paddle._C_ops.stack(combine_137, 0) + del combine_137 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_320 = paddle._C_ops.reshape(add_161, stack_135) + del add_161, stack_135 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_21 = paddle._C_ops.softmax(reshape_320, -1) + del reshape_320 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_21, slice_150, False, False) + del slice_150, softmax_21 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_142 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_138 = [slice_147, full_4, full_17] + del slice_147 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_136 = paddle._C_ops.stack(combine_138, 0) + del combine_138 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_321 = paddle._C_ops.reshape(transpose_142, stack_136) + del stack_136, transpose_142 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_131 = paddle._C_ops.matmul(reshape_321, parameter_38, False, False) + del parameter_38, reshape_321 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_162 = paddle._C_ops.add(matmul_131, parameter_37) + del matmul_131, parameter_37 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_322 = paddle._C_ops.reshape(add_162, full_int_array_38) + del add_162, full_int_array_38 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_323 = paddle._C_ops.reshape(reshape_322, full_int_array_40) + del full_int_array_40, reshape_322 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_143 = paddle._C_ops.transpose(reshape_323, [0, 1, 3, 2, 4, 5]) + del reshape_323 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_324 = paddle._C_ops.reshape(transpose_143, full_int_array_41) + del full_int_array_41, transpose_143 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_324, full_int_array_29, [1, 2]) + del reshape_324 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_139 = [slice_144, full_32, full_17] + del full_32, slice_144 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_137 = paddle._C_ops.stack(combine_139, 0) + del combine_139 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_325 = paddle._C_ops.reshape(roll_21, stack_137) + del roll_21, stack_137 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_163 = paddle._C_ops.add(add_158, reshape_325) + del add_158, reshape_325 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_163, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_132 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del layer_norm_138, parameter_34 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_164 = paddle._C_ops.add(matmul_132, parameter_33) + del matmul_132, parameter_33 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_21 = paddle._C_ops.gelu(add_164, False) + del add_164 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_133 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del gelu_21, parameter_32 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_165 = paddle._C_ops.add(matmul_133, parameter_31) + del matmul_133, parameter_31 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_166 = paddle._C_ops.add(add_163, add_165) + del add_163, add_165 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_85 = paddle._C_ops.shape64(add_166) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_151 = paddle._C_ops.slice( + shape64_85, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_85 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_140 = [slice_151, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_138 = paddle._C_ops.stack(combine_140, 0) + del combine_140 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_326 = paddle._C_ops.reshape(add_166, stack_138) + del add_166, stack_138 + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + del full_int_array_31 + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + del full_int_array_32 + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_326, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + del full_int_array_30 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_86 = paddle._C_ops.shape64(reshape_326) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_152 = paddle._C_ops.slice( + shape64_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_86 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_141 = [slice_152, full_29, full_29, full_17] + del full_17, full_29, slice_152 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_139 = paddle._C_ops.stack(combine_141, 0) + del combine_141 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(reshape_326, stack_139) + del reshape_326, stack_139 + + # builtin.combine: ([-1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32]) <- (-1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32) + combine_142 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + del strided_slice_10, strided_slice_11, strided_slice_8, strided_slice_9 + + # pd_op.concat: (-1x7x7x1536xf32) <- ([-1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_142, full_15) + del combine_142, full_15 + + # pd_op.full: (xi64) <- () + full_45 = paddle._C_ops.full( + [], float("1536"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_143 = [slice_151, full_16, full_45] + del full_16, full_45, slice_151 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_140 = paddle._C_ops.stack(combine_143, 0) + del combine_143 + + # pd_op.reshape: (-1x-1x1536xf32) <- (-1x7x7x1536xf32, 3xi64) + reshape_328 = paddle._C_ops.reshape(concat_2, stack_140) + del concat_2, stack_140 + + # pd_op.layer_norm: (-1x-1x1536xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1536xf32, 1536xf32, 1536xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_328, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30, reshape_328 + + # pd_op.matmul: (-1x-1x768xf32) <- (-1x-1x1536xf32, 1536x768xf32) + matmul_134 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del layer_norm_141, parameter_28 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_87 = paddle._C_ops.shape64(matmul_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_153 = paddle._C_ops.slice( + shape64_87, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_87 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_88 = paddle._C_ops.shape64(matmul_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_154 = paddle._C_ops.slice( + shape64_88, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_88 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_134, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_144 = [slice_153, full_3, full_3, full_28] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_141 = paddle._C_ops.stack(combine_144, 0) + del combine_144 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x-1x768xf32, 4xi64) + reshape_329 = paddle._C_ops.reshape(layer_norm_144, stack_141) + del layer_norm_144, stack_141 + + # pd_op.shape64: (4xi64) <- (-1x7x7x768xf32) + shape64_89 = paddle._C_ops.shape64(reshape_329) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_155 = paddle._C_ops.slice( + shape64_89, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_89 + + # pd_op.full: (xi64) <- () + full_46 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_145 = [slice_155, full_46, full_3, full_46, full_3, full_28] + del slice_155 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_142 = paddle._C_ops.stack(combine_145, 0) + del combine_145 + + # pd_op.reshape: (-1x1x7x1x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_330 = paddle._C_ops.reshape(reshape_329, stack_142) + del reshape_329, stack_142 + + # pd_op.transpose: (-1x1x1x7x7x768xf32) <- (-1x1x7x1x7x768xf32) + transpose_144 = paddle._C_ops.transpose(reshape_330, [0, 1, 3, 2, 4, 5]) + del reshape_330 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_43 = [-1, 7, 7, 768] + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x1x7x7x768xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(transpose_144, full_int_array_43) + del transpose_144 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_44 = [-1, 49, 768] + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_332 = paddle._C_ops.reshape(reshape_331, full_int_array_44) + del reshape_331 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_90 = paddle._C_ops.shape64(reshape_332) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_156 = paddle._C_ops.slice( + shape64_90, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_90 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_135 = paddle._C_ops.matmul(reshape_332, parameter_25, False, False) + del parameter_25, reshape_332 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_167 = paddle._C_ops.add(matmul_135, parameter_24) + del matmul_135, parameter_24 + + # pd_op.full: (xi64) <- () + full_47 = paddle._C_ops.full( + [], float("24"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_146 = [slice_156, full_4, full_5, full_47, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_143 = paddle._C_ops.stack(combine_146, 0) + del combine_146 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_333 = paddle._C_ops.reshape(add_167, stack_143) + del add_167, stack_143 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_145 = paddle._C_ops.transpose(reshape_333, [2, 0, 3, 1, 4]) + del reshape_333 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_157 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_158 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_159 = paddle._C_ops.slice( + transpose_145, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_145 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_157, full_7, float("0"), True) + del slice_157 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_146 = paddle._C_ops.transpose(slice_158, [0, 1, 3, 2]) + del slice_158 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_136 = paddle._C_ops.matmul(scale_22, transpose_146, False, False) + del scale_22, transpose_146 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_334 = paddle._C_ops.reshape(data_45, full_int_array_7) + del data_45 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_22 = paddle._C_ops.index_select(data_46, reshape_334, 0) + del data_46, reshape_334 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_22, full_int_array_8) + del index_select_22 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_147 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(transpose_147, full_int_array_0) + del transpose_147 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_168 = paddle._C_ops.add(matmul_136, unsqueeze_66) + del matmul_136, unsqueeze_66 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_22 = paddle._C_ops.softmax(add_168, -1) + del add_168 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_22, slice_159, False, False) + del slice_159, softmax_22 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_148 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_147 = [slice_156, full_4, full_28] + del slice_156 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_144 = paddle._C_ops.stack(combine_147, 0) + del combine_147 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_336 = paddle._C_ops.reshape(transpose_148, stack_144) + del stack_144, transpose_148 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_138 = paddle._C_ops.matmul(reshape_336, parameter_23, False, False) + del parameter_23, reshape_336 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_169 = paddle._C_ops.add(matmul_138, parameter_22) + del matmul_138, parameter_22 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_337 = paddle._C_ops.reshape(add_169, full_int_array_43) + del add_169 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_45 = [-1, 1, 1, 7, 7, 768] + + # pd_op.reshape: (-1x1x1x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(reshape_337, full_int_array_45) + del reshape_337 + + # pd_op.transpose: (-1x1x7x1x7x768xf32) <- (-1x1x1x7x7x768xf32) + transpose_149 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x7x1x7x768xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_149, full_int_array_43) + del transpose_149 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_148 = [slice_153, full_4, full_28] + del slice_153 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_145 = paddle._C_ops.stack(combine_148, 0) + del combine_148 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, stack_145) + del reshape_339, stack_145 + + # pd_op.add: (-1x49x768xf32) <- (-1x-1x768xf32, -1x49x768xf32) + add_170 = paddle._C_ops.add(matmul_134, reshape_340) + del matmul_134, reshape_340 + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_170, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (-1x49x3072xf32) <- (-1x49x768xf32, 768x3072xf32) + matmul_139 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del layer_norm_147, parameter_19 + + # pd_op.add: (-1x49x3072xf32) <- (-1x49x3072xf32, 3072xf32) + add_171 = paddle._C_ops.add(matmul_139, parameter_18) + del matmul_139, parameter_18 + + # pd_op.gelu: (-1x49x3072xf32) <- (-1x49x3072xf32) + gelu_22 = paddle._C_ops.gelu(add_171, False) + del add_171 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x3072xf32, 3072x768xf32) + matmul_140 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del gelu_22, parameter_17 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_172 = paddle._C_ops.add(matmul_140, parameter_16) + del matmul_140, parameter_16 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, -1x49x768xf32) + add_173 = paddle._C_ops.add(add_170, add_172) + del add_170, add_172 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_91 = paddle._C_ops.shape64(add_173) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_160 = paddle._C_ops.slice( + shape64_91, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_91 + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_173, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_149 = [slice_160, full_3, full_3, full_28] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_146 = paddle._C_ops.stack(combine_149, 0) + del combine_149 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_341 = paddle._C_ops.reshape(layer_norm_150, stack_146) + del layer_norm_150, stack_146 + + # pd_op.shape64: (4xi64) <- (-1x7x7x768xf32) + shape64_92 = paddle._C_ops.shape64(reshape_341) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_161 = paddle._C_ops.slice( + shape64_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_92 + + # pd_op.roll: (-1x7x7x768xf32) <- (-1x7x7x768xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_341, full_int_array_11, [1, 2]) + del reshape_341 + + # pd_op.shape64: (4xi64) <- (-1x7x7x768xf32) + shape64_93 = paddle._C_ops.shape64(roll_22) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_162 = paddle._C_ops.slice( + shape64_93, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_93 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_150 = [slice_162, full_46, full_3, full_46, full_3, full_28] + del full_3, slice_162 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_147 = paddle._C_ops.stack(combine_150, 0) + del combine_150 + + # pd_op.reshape: (-1x1x7x1x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_342 = paddle._C_ops.reshape(roll_22, stack_147) + del roll_22, stack_147 + + # pd_op.transpose: (-1x1x1x7x7x768xf32) <- (-1x1x7x1x7x768xf32) + transpose_150 = paddle._C_ops.transpose(reshape_342, [0, 1, 3, 2, 4, 5]) + del reshape_342 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x1x7x7x768xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(transpose_150, full_int_array_43) + del transpose_150 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_344 = paddle._C_ops.reshape(reshape_343, full_int_array_44) + del full_int_array_44, reshape_343 + + # pd_op.full: (1x7x7x1xf32) <- () + full_48 = paddle._C_ops.full( + [1, 7, 7, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_48, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_48, full_int_array_12 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_15, set_value__100 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_17, full_int_array_18, set_value__101 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_19, set_value__102 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_13, set_value__103 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_16, full_int_array_21, set_value__104 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_22, full_int_array_23, set_value__105 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_20, full_int_array_24, set_value__106 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_11, full_int_array_25, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_46 = [1, 1, 7, 1, 7, 1] + + # pd_op.reshape: (1x1x7x1x7x1xf32) <- (1x7x7x1xf32, 6xi64) + reshape_345 = paddle._C_ops.reshape(set_value__11, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (1x1x1x7x7x1xf32) <- (1x1x7x1x7x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_345, [0, 1, 3, 2, 4, 5]) + del reshape_345 + + # pd_op.reshape: (1x7x7x1xf32) <- (1x1x1x7x7x1xf32, 4xi64) + reshape_346 = paddle._C_ops.reshape(transpose_151, full_int_array_27) + del full_int_array_27, transpose_151 + + # pd_op.reshape: (1x49xf32) <- (1x7x7x1xf32, 2xi64) + reshape_347 = paddle._C_ops.reshape(reshape_346, full_int_array_28) + del full_int_array_28, reshape_346 + + # pd_op.unsqueeze: (1x1x49xf32) <- (1x49xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_347, full_int_array_1) + + # pd_op.unsqueeze: (1x49x1xf32) <- (1x49xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_347, full_int_array_5) + del reshape_347 + + # pd_op.subtract: (1x49x49xf32) <- (1x1x49xf32, 1x49x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (1x49x49xb) <- (1x49x49xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_10) + + # pd_op.full: (1x49x49xf32) <- () + full_49 = paddle._C_ops.full( + [1, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_49, subtract_11) + del full_49, not_equal_11, subtract_11 + + # pd_op.equal: (1x49x49xb) <- (1x49x49xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_10) + del full_10 + + # pd_op.full: (1x49x49xf32) <- () + full_50 = paddle._C_ops.full( + [1, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_23 = paddle._C_ops.where(equal_11, full_50, where_22) + del equal_11, full_50, where_22 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_94 = paddle._C_ops.shape64(reshape_344) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_163 = paddle._C_ops.slice( + shape64_94, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_94 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_141 = paddle._C_ops.matmul(reshape_344, parameter_13, False, False) + del parameter_13, reshape_344 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_174 = paddle._C_ops.add(matmul_141, parameter_12) + del matmul_141, parameter_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_151 = [slice_163, full_4, full_5, full_47, full_6] + del full_5, full_6 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_148 = paddle._C_ops.stack(combine_151, 0) + del combine_151 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_348 = paddle._C_ops.reshape(add_174, stack_148) + del add_174, stack_148 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_152 = paddle._C_ops.transpose(reshape_348, [2, 0, 3, 1, 4]) + del reshape_348 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_164 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_165 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_166 = paddle._C_ops.slice( + transpose_152, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del full_int_array_6, transpose_152 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_164, full_7, float("0"), True) + del full_7, slice_164 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_153 = paddle._C_ops.transpose(slice_165, [0, 1, 3, 2]) + del slice_165 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_142 = paddle._C_ops.matmul(scale_23, transpose_153, False, False) + del scale_23, transpose_153 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_349 = paddle._C_ops.reshape(data_47, full_int_array_7) + del data_47, full_int_array_7 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_23 = paddle._C_ops.index_select(data_48, reshape_349, 0) + del data_48, reshape_349 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_350 = paddle._C_ops.reshape(index_select_23, full_int_array_8) + del full_int_array_8, index_select_23 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_154 = paddle._C_ops.transpose(reshape_350, [2, 0, 1]) + del reshape_350 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(transpose_154, full_int_array_0) + del transpose_154 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_175 = paddle._C_ops.add(matmul_142, unsqueeze_69) + del matmul_142, unsqueeze_69 + + # pd_op.full: (xi64) <- () + full_51 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_11 = paddle._C_ops.floor_divide(slice_163, full_51) + del full_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_152 = [floor_divide_11, full_46, full_47, full_4, full_4] + del floor_divide_11, full_46 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_149 = paddle._C_ops.stack(combine_152, 0) + del combine_152 + + # pd_op.reshape: (-1x1x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_351 = paddle._C_ops.reshape(add_175, stack_149) + del add_175, stack_149 + + # pd_op.unsqueeze: (1x1x49x49xf32) <- (1x49x49xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(where_23, full_int_array_1) + del full_int_array_1, where_23 + + # pd_op.unsqueeze: (1x1x1x49x49xf32) <- (1x1x49x49xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(unsqueeze_70, full_int_array_0) + del full_int_array_0, unsqueeze_70 + + # pd_op.add: (-1x1x24x49x49xf32) <- (-1x1x24x49x49xf32, 1x1x1x49x49xf32) + add_176 = paddle._C_ops.add(reshape_351, unsqueeze_71) + del reshape_351, unsqueeze_71 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_153 = [slice_163, full_47, full_4, full_4] + del full_47 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_150 = paddle._C_ops.stack(combine_153, 0) + del combine_153 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x1x24x49x49xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(add_176, stack_150) + del add_176, stack_150 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_23 = paddle._C_ops.softmax(reshape_352, -1) + del reshape_352 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_23, slice_166, False, False) + del slice_166, softmax_23 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_155 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_154 = [slice_163, full_4, full_28] + del slice_163 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_151 = paddle._C_ops.stack(combine_154, 0) + del combine_154 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_353 = paddle._C_ops.reshape(transpose_155, stack_151) + del stack_151, transpose_155 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_144 = paddle._C_ops.matmul(reshape_353, parameter_11, False, False) + del parameter_11, reshape_353 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_177 = paddle._C_ops.add(matmul_144, parameter_10) + del matmul_144, parameter_10 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_354 = paddle._C_ops.reshape(add_177, full_int_array_43) + del add_177 + + # pd_op.reshape: (-1x1x1x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_355 = paddle._C_ops.reshape(reshape_354, full_int_array_45) + del full_int_array_45, reshape_354 + + # pd_op.transpose: (-1x1x7x1x7x768xf32) <- (-1x1x1x7x7x768xf32) + transpose_156 = paddle._C_ops.transpose(reshape_355, [0, 1, 3, 2, 4, 5]) + del reshape_355 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x7x1x7x768xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(transpose_156, full_int_array_43) + del full_int_array_43, transpose_156 + + # pd_op.roll: (-1x7x7x768xf32) <- (-1x7x7x768xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_356, full_int_array_29, [1, 2]) + del full_int_array_29, reshape_356 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_155 = [slice_160, full_4, full_28] + del full_28, full_4, slice_160 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_152 = paddle._C_ops.stack(combine_155, 0) + del combine_155 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_357 = paddle._C_ops.reshape(roll_23, stack_152) + del roll_23, stack_152 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, -1x49x768xf32) + add_178 = paddle._C_ops.add(add_173, reshape_357) + del add_173, reshape_357 + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_178, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (-1x49x3072xf32) <- (-1x49x768xf32, 768x3072xf32) + matmul_145 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del layer_norm_153, parameter_7 + + # pd_op.add: (-1x49x3072xf32) <- (-1x49x3072xf32, 3072xf32) + add_179 = paddle._C_ops.add(matmul_145, parameter_6) + del matmul_145, parameter_6 + + # pd_op.gelu: (-1x49x3072xf32) <- (-1x49x3072xf32) + gelu_23 = paddle._C_ops.gelu(add_179, False) + del add_179 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x3072xf32, 3072x768xf32) + matmul_146 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del gelu_23, parameter_5 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_180 = paddle._C_ops.add(matmul_146, parameter_4) + del matmul_146, parameter_4 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, -1x49x768xf32) + add_181 = paddle._C_ops.add(add_178, add_180) + del add_178, add_180 + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_156, layer_norm_157, layer_norm_158 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_181, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_181, parameter_2, parameter_3 + + # pd_op.transpose: (-1x768x49xf32) <- (-1x49x768xf32) + transpose_157 = paddle._C_ops.transpose(layer_norm_156, [0, 2, 1]) + del layer_norm_156 + + # pd_op.unsqueeze: (-1x768x1x49xf32) <- (-1x768x49xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(transpose_157, full_int_array_5) + del transpose_157 + + # pd_op.pool2d: (-1x768x1x1xf32) <- (-1x768x1x49xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_72, + full_int_array_14, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_14, unsqueeze_72 + + # pd_op.squeeze: (-1x768x1xf32) <- (-1x768x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_5) + del full_int_array_5, pool2d_0 + + # pd_op.flatten: (-1x768xf32) <- (-1x768x1xf32) + flatten_1 = paddle._C_ops.flatten(squeeze_0, 1, 2) + del squeeze_0 + + # pd_op.matmul: (-1x102xf32) <- (-1x768xf32, 768x102xf32) + matmul_147 = paddle._C_ops.matmul(flatten_1, parameter_1, False, False) + del flatten_1, parameter_1 + + # pd_op.add: (-1x102xf32) <- (-1x102xf32, 102xf32) + add_0 = paddle._C_ops.add(matmul_147, parameter_0) + del matmul_147, parameter_0 + + return ( + add_0, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/weight_meta.py new file mode 100644 index 00000000..547d11be --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_1/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [1536, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [768, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [384, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [384, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [96, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [96, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [96, 288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [384, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [96, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [96, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [96, 288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [96, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/graph_hash.txt new file mode 100644 index 00000000..678a18a1 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/graph_hash.txt @@ -0,0 +1 @@ +b30ca9883649d991bc8c4ba04da935a9b4b73a36613bcc8529ef9a0a2fb8b1d3 \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/graph_net.json b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/input_meta.py b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/input_meta.py new file mode 100644 index 00000000..af3b5ec6 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/input_meta.py @@ -0,0 +1,439 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [60, 3, 224, 224] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [169, 3] + dtype = "float32" + low = -10.857 + high = 3.30842 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [169, 3] + dtype = "float32" + low = -10.7209 + high = 3.22007 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [169, 6] + dtype = "float32" + low = -4.58925 + high = 7.8892 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [169, 6] + dtype = "float32" + low = -4.75612 + high = 5.78424 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [169, 12] + dtype = "float32" + low = -8.44948 + high = 4.78259 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [169, 12] + dtype = "float32" + low = -11.168 + high = 10.3589 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [169, 12] + dtype = "float32" + low = -9.90827 + high = 4.33485 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [169, 12] + dtype = "float32" + low = -8.18422 + high = 3.8872 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [169, 12] + dtype = "float32" + low = -5.03777 + high = 3.21004 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [169, 12] + dtype = "float32" + low = -6.8093 + high = 4.11759 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [169, 12] + dtype = "float32" + low = -10.1307 + high = 4.64285 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [169, 12] + dtype = "float32" + low = -5.80369 + high = 3.5646 + data = None + + +class Program_weight_tensor_data_25: + name = "data_25" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_26: + name = "data_26" + shape = [169, 12] + dtype = "float32" + low = -10.4222 + high = 2.90335 + data = None + + +class Program_weight_tensor_data_27: + name = "data_27" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_28: + name = "data_28" + shape = [169, 12] + dtype = "float32" + low = -8.23476 + high = 3.16348 + data = None + + +class Program_weight_tensor_data_29: + name = "data_29" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_30: + name = "data_30" + shape = [169, 12] + dtype = "float32" + low = -9.31923 + high = 3.1928 + data = None + + +class Program_weight_tensor_data_31: + name = "data_31" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_32: + name = "data_32" + shape = [169, 12] + dtype = "float32" + low = -4.71978 + high = 3.61097 + data = None + + +class Program_weight_tensor_data_33: + name = "data_33" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_34: + name = "data_34" + shape = [169, 12] + dtype = "float32" + low = -9.57184 + high = 2.48576 + data = None + + +class Program_weight_tensor_data_35: + name = "data_35" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_36: + name = "data_36" + shape = [169, 12] + dtype = "float32" + low = -5.8608 + high = 2.54084 + data = None + + +class Program_weight_tensor_data_37: + name = "data_37" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_38: + name = "data_38" + shape = [169, 12] + dtype = "float32" + low = -11.2029 + high = 1.8404 + data = None + + +class Program_weight_tensor_data_39: + name = "data_39" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_40: + name = "data_40" + shape = [169, 12] + dtype = "float32" + low = -11.4151 + high = 2.56611 + data = None + + +class Program_weight_tensor_data_41: + name = "data_41" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_42: + name = "data_42" + shape = [169, 12] + dtype = "float32" + low = -12.6664 + high = 1.54788 + data = None + + +class Program_weight_tensor_data_43: + name = "data_43" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_44: + name = "data_44" + shape = [169, 12] + dtype = "float32" + low = -6.02056 + high = 2.94447 + data = None + + +class Program_weight_tensor_data_45: + name = "data_45" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_46: + name = "data_46" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_47: + name = "data_47" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_48: + name = "data_48" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/model.py b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/model.py new file mode 100644 index 00000000..55b93a58 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/model.py @@ -0,0 +1,13786 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + data_25, + data_26, + data_27, + data_28, + data_29, + data_30, + data_31, + data_32, + data_33, + data_34, + data_35, + data_36, + data_37, + data_38, + data_39, + data_40, + data_41, + data_42, + data_43, + data_44, + data_45, + data_46, + data_47, + data_48, + ): + # pd_op.shape64: (4xi64) <- (-1x3x224x224xf32) + shape64_0 = paddle._C_ops.shape64(data_0) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [0] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_266 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_259 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_256 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_249 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_233 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_226 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_223 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_216 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_213 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_206 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_203 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_196 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_193 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_186 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_183 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_176 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_173 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_166 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_163 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_156 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_153 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_146 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_143 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_136 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_133 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_126 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_123 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_116 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_113 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_106 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_103 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_96 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_93 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_86 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_83 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_76 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_73 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_66 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_63 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_56 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_40 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_33 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_30 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_23 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_12 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_5 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_4 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_0 = full_int_array_7 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_8 = [1] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_261 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_260 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_251 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_250 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_228 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_227 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_218 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_217 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_208 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_207 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_198 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_197 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_188 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_187 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_178 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_177 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_168 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_167 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_158 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_157 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_148 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_147 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_138 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_137 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_128 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_127 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_118 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_117 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_108 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_107 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_98 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_97 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_88 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_87 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_78 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_77 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_68 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_67 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_58 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_57 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_35 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_34 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_25 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_24 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_7 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_6 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_2 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_1 = full_int_array_8 + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + shape64_0, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_0 + + # pd_op.conv2d: (-1x96x56x56xf32) <- (-1x3x224x224xf32, 96x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_304, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_304 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, -1, 1, 1] + + # pd_op.reshape: (1x96x1x1xf32) <- (96xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_303, full_int_array_9) + del full_int_array_9, parameter_303 + + # pd_op.add: (-1x96x56x56xf32) <- (-1x96x56x56xf32, 1x96x1x1xf32) + add_0 = paddle._C_ops.add(conv2d_0, reshape_0) + + # pd_op.shape64: (4xi64) <- (-1x96x56x56xf32) + shape64_1 = paddle._C_ops.shape64(add_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + shape64_1, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_1 + + # pd_op.flatten: (-1x96x3136xf32) <- (-1x96x56x56xf32) + flatten_1 = paddle._C_ops.flatten(add_0, 2, 3) + + # pd_op.transpose: (-1x3136x96xf32) <- (-1x96x3136xf32) + transpose_0 = paddle._C_ops.transpose(flatten_1, [0, 2, 1]) + del flatten_1 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_302, parameter_301, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_301, parameter_302 + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_2 = paddle._C_ops.shape64(layer_norm_0) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + shape64_2, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_2 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_300, parameter_299, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_299, parameter_300 + + # pd_op.full: (xi64) <- () + full_25 = paddle._C_ops.full( + [], float("56"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_26 = paddle._C_ops.full( + [], float("96"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_0 = [slice_26, full_25, full_25, full_26] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_0 = paddle._C_ops.stack(combine_0, 0) + del combine_0 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x3136x96xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, stack_0) + del stack_0 + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_3 = paddle._C_ops.shape64(reshape_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + shape64_3, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_3 + + # pd_op.full: (xi64) <- () + full_27 = paddle._C_ops.full( + [], float("8"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_28 = paddle._C_ops.full( + [], float("7"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_1 = [slice_27, full_27, full_28, full_27, full_28, full_26] + del slice_27 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_1 = paddle._C_ops.stack(combine_1, 0) + del combine_1 + + # pd_op.reshape: (-1x8x7x8x7x96xf32) <- (-1x56x56x96xf32, 6xi64) + reshape_211 = paddle._C_ops.reshape(reshape_1, stack_1) + del stack_1 + + # pd_op.transpose: (-1x8x8x7x7x96xf32) <- (-1x8x7x8x7x96xf32) + transpose_1 = paddle._C_ops.transpose(reshape_211, [0, 1, 3, 2, 4, 5]) + del reshape_211 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [-1, 7, 7, 96] + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x8x8x7x7x96xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(transpose_1, full_int_array_10) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_11 = [-1, 49, 96] + + # pd_op.reshape: (-1x49x96xf32) <- (-1x7x7x96xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(reshape_2, full_int_array_11) + + # pd_op.shape64: (3xi64) <- (-1x49x96xf32) + shape64_4 = paddle._C_ops.shape64(reshape_3) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + shape64_4, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_4 + + # pd_op.matmul: (-1x49x288xf32) <- (-1x49x96xf32, 96x288xf32) + matmul_0 = paddle._C_ops.matmul(reshape_3, parameter_298, False, False) + del parameter_298 + + # pd_op.add: (-1x49x288xf32) <- (-1x49x288xf32, 288xf32) + add_1 = paddle._C_ops.add(matmul_0, parameter_297) + del parameter_297 + + # pd_op.full: (xi64) <- () + full_29 = paddle._C_ops.full( + [], float("49"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_30 = paddle._C_ops.full( + [], float("3"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_31 = paddle._C_ops.full( + [], float("32"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_2 = [slice_28, full_29, full_30, full_30, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_2 = paddle._C_ops.stack(combine_2, 0) + del combine_2 + + # pd_op.reshape: (-1x49x3x3x32xf32) <- (-1x49x288xf32, 5xi64) + reshape_212 = paddle._C_ops.reshape(add_1, stack_2) + del stack_2 + + # pd_op.transpose: (3x-1x3x49x32xf32) <- (-1x49x3x3x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_212, [2, 0, 3, 1, 4]) + del reshape_212 + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [2] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_271 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_269 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_263 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_262 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_253 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_252 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_230 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_229 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_220 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_219 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_210 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_209 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_200 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_199 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_190 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_189 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_180 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_179 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_170 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_169 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_160 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_159 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_150 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_149 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_140 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_139 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_130 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_129 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_120 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_119 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_110 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_109 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_100 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_99 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_90 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_89 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_80 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_79 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_70 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_69 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_60 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_59 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_37 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_36 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_27 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_26 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_9 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_8 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_3 = full_int_array_0 + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [3] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_264 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_254 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_231 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_221 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_211 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_201 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_191 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_181 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_171 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_161 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_151 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_141 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_131 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_121 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_111 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_101 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_91 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_81 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_71 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_61 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_38 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_28 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_10 = full_int_array_1 + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xf32) <- (1xf32) + assign_265 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_255 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_232 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_222 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_212 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_202 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_192 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_182 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_172 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_162 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_152 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_142 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_132 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_122 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_112 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_102 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_92 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_82 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_72 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_62 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_39 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_29 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_11 = full_0 + + # pd_op.scale: (-1x3x49x32xf32) <- (-1x3x49x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_29, full_0, float("0"), True) + del slice_29 + + # pd_op.transpose: (-1x3x32x49xf32) <- (-1x3x49x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_30, [0, 1, 3, 2]) + del slice_30 + + # pd_op.matmul: (-1x3x49x49xf32) <- (-1x3x49x32xf32, -1x3x32x49xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_12 = [-1] + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_4 = paddle._C_ops.reshape(data_1, full_int_array_12) + del data_1 + + # pd_op.index_select: (2401x3xf32) <- (169x3xf32, 2401xi64) + index_select_0 = paddle._C_ops.index_select(data_2, reshape_4, 0) + del data_2 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_13 = [49, 49, -1] + + # pd_op.reshape: (49x49x3xf32) <- (2401x3xf32, 3xi64) + reshape_213 = paddle._C_ops.reshape(index_select_0, full_int_array_13) + + # pd_op.transpose: (3x49x49xf32) <- (49x49x3xf32) + transpose_4 = paddle._C_ops.transpose(reshape_213, [2, 0, 1]) + del reshape_213 + + # pd_op.unsqueeze: (1x3x49x49xf32) <- (3x49x49xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_7) + + # pd_op.add: (-1x3x49x49xf32) <- (-1x3x49x49xf32, 1x3x49x49xf32) + add_170 = paddle._C_ops.add(matmul_1, unsqueeze_0) + + # pd_op.softmax: (-1x3x49x49xf32) <- (-1x3x49x49xf32) + softmax_0 = paddle._C_ops.softmax(add_170, -1) + del add_170 + + # pd_op.matmul: (-1x3x49x32xf32) <- (-1x3x49x49xf32, -1x3x49x32xf32) + matmul_124 = paddle._C_ops.matmul(softmax_0, slice_0, False, False) + + # pd_op.transpose: (-1x49x3x32xf32) <- (-1x3x49x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_3 = [slice_28, full_29, full_26] + del slice_28 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_3 = paddle._C_ops.stack(combine_3, 0) + del combine_3 + + # pd_op.reshape: (-1x49x96xf32) <- (-1x49x3x32xf32, 3xi64) + reshape_5 = paddle._C_ops.reshape(transpose_5, stack_3) + del stack_3 + + # pd_op.matmul: (-1x49x96xf32) <- (-1x49x96xf32, 96x96xf32) + matmul_2 = paddle._C_ops.matmul(reshape_5, parameter_296, False, False) + del parameter_296 + + # pd_op.add: (-1x49x96xf32) <- (-1x49x96xf32, 96xf32) + add_2 = paddle._C_ops.add(matmul_2, parameter_295) + del parameter_295 + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x49x96xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_2, full_int_array_10) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_14 = [-1, 8, 8, 7, 7, 96] + + # pd_op.reshape: (-1x8x8x7x7x96xf32) <- (-1x7x7x96xf32, 6xi64) + reshape_214 = paddle._C_ops.reshape(reshape_6, full_int_array_14) + + # pd_op.transpose: (-1x8x7x8x7x96xf32) <- (-1x8x8x7x7x96xf32) + transpose_6 = paddle._C_ops.transpose(reshape_214, [0, 1, 3, 2, 4, 5]) + del reshape_214 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_15 = [-1, 56, 56, 96] + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x8x7x8x7x96xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(transpose_6, full_int_array_15) + + # pd_op.full: (xi64) <- () + full_32 = paddle._C_ops.full( + [], float("3136"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_4 = [slice_26, full_32, full_26] + del slice_26 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_4 = paddle._C_ops.stack(combine_4, 0) + del combine_4 + + # pd_op.reshape: (-1x3136x96xf32) <- (-1x56x56x96xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(reshape_7, stack_4) + del stack_4 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_3 = paddle._C_ops.add(layer_norm_0, reshape_8) + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_3, parameter_294, parameter_293, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_293, parameter_294 + + # pd_op.matmul: (-1x3136x384xf32) <- (-1x3136x96xf32, 96x384xf32) + matmul_3 = paddle._C_ops.matmul(layer_norm_6, parameter_292, False, False) + del parameter_292 + + # pd_op.add: (-1x3136x384xf32) <- (-1x3136x384xf32, 384xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_291) + del parameter_291 + + # pd_op.gelu: (-1x3136x384xf32) <- (-1x3136x384xf32) + gelu_0 = paddle._C_ops.gelu(add_4, False) + + # pd_op.matmul: (-1x3136x96xf32) <- (-1x3136x384xf32, 384x96xf32) + matmul_4 = paddle._C_ops.matmul(gelu_0, parameter_290, False, False) + del parameter_290 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, 96xf32) + add_5 = paddle._C_ops.add(matmul_4, parameter_289) + del parameter_289 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_6 = paddle._C_ops.add(add_3, add_5) + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_5 = paddle._C_ops.shape64(add_6) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + shape64_5, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_5 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_288, parameter_287, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_287, parameter_288 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_5 = [slice_31, full_25, full_25, full_26] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_5 = paddle._C_ops.stack(combine_5, 0) + del combine_5 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x3136x96xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(layer_norm_9, stack_5) + del stack_5 + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_6 = paddle._C_ops.shape64(reshape_9) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + shape64_6, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_2 = [-3, -3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_258 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_225 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_205 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_185 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_165 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_145 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_125 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_105 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_85 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_65 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_32 = full_int_array_2 + + # pd_op.roll: (-1x56x56x96xf32) <- (-1x56x56x96xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_9, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_7 = paddle._C_ops.shape64(roll_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + shape64_7, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_7 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_6 = [slice_33, full_27, full_28, full_27, full_28, full_26] + del full_27, slice_33 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_6 = paddle._C_ops.stack(combine_6, 0) + del combine_6 + + # pd_op.reshape: (-1x8x7x8x7x96xf32) <- (-1x56x56x96xf32, 6xi64) + reshape_215 = paddle._C_ops.reshape(roll_0, stack_6) + del stack_6 + + # pd_op.transpose: (-1x8x8x7x7x96xf32) <- (-1x8x7x8x7x96xf32) + transpose_7 = paddle._C_ops.transpose(reshape_215, [0, 1, 3, 2, 4, 5]) + del reshape_215 + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x8x8x7x7x96xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(transpose_7, full_int_array_10) + + # pd_op.reshape: (-1x49x96xf32) <- (-1x7x7x96xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(reshape_10, full_int_array_11) + del full_int_array_11 + + # pd_op.full: (1x56x56x1xf32) <- () + full_33 = paddle._C_ops.full( + [1, 56, 56, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_16 = [0, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_236 = full_int_array_16 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_43 = full_int_array_16 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_14 = full_int_array_16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_17 = [-7, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_18 = [1, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_270 = full_int_array_18 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_245 = full_int_array_18 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_52 = full_int_array_18 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_20 = full_int_array_18 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + full_33, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_33 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_19 = [0, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_20 = [-7, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [0, -3] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-7, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [-7, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [-3, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__14 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [-3, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_26 = [-3, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_27 = [2147483647, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__17 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [2147483647, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__18 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [2147483647, 2147483647] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_246 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_243 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_240 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_237 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_53 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_50 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_47 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_44 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_21 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_18 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_16 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_15 = full_int_array_29 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_30 = [1, 8, 7, 8, 7, 1] + + # pd_op.reshape: (1x8x7x8x7x1xf32) <- (1x56x56x1xf32, 6xi64) + reshape_216 = paddle._C_ops.reshape(set_value__0, full_int_array_30) + del full_int_array_30 + + # pd_op.transpose: (1x8x8x7x7x1xf32) <- (1x8x7x8x7x1xf32) + transpose_146 = paddle._C_ops.transpose(reshape_216, [0, 1, 3, 2, 4, 5]) + del reshape_216 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_31 = [-1, 7, 7, 1] + + # pd_op.reshape: (64x7x7x1xf32) <- (1x8x8x7x7x1xf32, 4xi64) + reshape_217 = paddle._C_ops.reshape(transpose_146, full_int_array_31) + del transpose_146 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [-1, 49] + + # pd_op.reshape: (64x49xf32) <- (64x7x7x1xf32, 2xi64) + reshape_218 = paddle._C_ops.reshape(reshape_217, full_int_array_32) + del reshape_217 + + # pd_op.unsqueeze: (64x1x49xf32) <- (64x49xf32, 1xi64) + unsqueeze_37 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_8) + + # pd_op.unsqueeze: (64x49x1xf32) <- (64x49xf32, 1xi64) + unsqueeze_38 = paddle._C_ops.unsqueeze(reshape_218, full_int_array_0) + del reshape_218 + + # pd_op.subtract: (64x49x49xf32) <- (64x1x49xf32, 64x49x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_37, unsqueeze_38) + del unsqueeze_37, unsqueeze_38 + + # pd_op.full: (xf32) <- () + full_34 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x49x49xb) <- (64x49x49xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_34) + + # pd_op.full: (64x49x49xf32) <- () + full_35 = paddle._C_ops.full( + [64, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_35, subtract_0) + del full_35, not_equal_0, subtract_0 + + # pd_op.equal: (64x49x49xb) <- (64x49x49xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_34) + + # pd_op.full: (64x49x49xf32) <- () + full_36 = paddle._C_ops.full( + [64, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_1 = paddle._C_ops.where(equal_0, full_36, where_0) + del equal_0, full_36, where_0 + + # pd_op.shape64: (3xi64) <- (-1x49x96xf32) + shape64_8 = paddle._C_ops.shape64(reshape_11) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + shape64_8, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_8 + + # pd_op.matmul: (-1x49x288xf32) <- (-1x49x96xf32, 96x288xf32) + matmul_5 = paddle._C_ops.matmul(reshape_11, parameter_286, False, False) + del parameter_286 + + # pd_op.add: (-1x49x288xf32) <- (-1x49x288xf32, 288xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_285) + del parameter_285 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_7 = [slice_34, full_29, full_30, full_30, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_7 = paddle._C_ops.stack(combine_7, 0) + del combine_7 + + # pd_op.reshape: (-1x49x3x3x32xf32) <- (-1x49x288xf32, 5xi64) + reshape_219 = paddle._C_ops.reshape(add_7, stack_7) + del stack_7 + + # pd_op.transpose: (3x-1x3x49x32xf32) <- (-1x49x3x3x32xf32) + transpose_8 = paddle._C_ops.transpose(reshape_219, [2, 0, 3, 1, 4]) + del reshape_219 + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x3x49x32xf32) <- (-1x3x49x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_35, full_0, float("0"), True) + del slice_35 + + # pd_op.transpose: (-1x3x32x49xf32) <- (-1x3x49x32xf32) + transpose_9 = paddle._C_ops.transpose(slice_36, [0, 1, 3, 2]) + del slice_36 + + # pd_op.matmul: (-1x3x49x49xf32) <- (-1x3x49x32xf32, -1x3x32x49xf32) + matmul_6 = paddle._C_ops.matmul(scale_1, transpose_9, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_12 = paddle._C_ops.reshape(data_3, full_int_array_12) + del data_3 + + # pd_op.index_select: (2401x3xf32) <- (169x3xf32, 2401xi64) + index_select_1 = paddle._C_ops.index_select(data_4, reshape_12, 0) + del data_4 + + # pd_op.reshape: (49x49x3xf32) <- (2401x3xf32, 3xi64) + reshape_220 = paddle._C_ops.reshape(index_select_1, full_int_array_13) + + # pd_op.transpose: (3x49x49xf32) <- (49x49x3xf32) + transpose_10 = paddle._C_ops.transpose(reshape_220, [2, 0, 1]) + del reshape_220 + + # pd_op.unsqueeze: (1x3x49x49xf32) <- (3x49x49xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(transpose_10, full_int_array_7) + + # pd_op.add: (-1x3x49x49xf32) <- (-1x3x49x49xf32, 1x3x49x49xf32) + add_8 = paddle._C_ops.add(matmul_6, unsqueeze_1) + + # pd_op.full: (xi64) <- () + full_37 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_0 = paddle._C_ops.floor_divide(slice_34, full_37) + del full_37 + + # pd_op.full: (xi64) <- () + full_38 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_8 = [floor_divide_0, full_38, full_30, full_29, full_29] + del floor_divide_0, full_38 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_8 = paddle._C_ops.stack(combine_8, 0) + del combine_8 + + # pd_op.reshape: (-1x64x3x49x49xf32) <- (-1x3x49x49xf32, 5xi64) + reshape_13 = paddle._C_ops.reshape(add_8, stack_8) + del stack_8 + + # pd_op.unsqueeze: (64x1x49x49xf32) <- (64x49x49xf32, 1xi64) + unsqueeze_39 = paddle._C_ops.unsqueeze(where_1, full_int_array_8) + del where_1 + + # pd_op.unsqueeze: (1x64x1x49x49xf32) <- (64x1x49x49xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(unsqueeze_39, full_int_array_7) + del unsqueeze_39 + + # pd_op.add: (-1x64x3x49x49xf32) <- (-1x64x3x49x49xf32, 1x64x1x49x49xf32) + add_9 = paddle._C_ops.add(reshape_13, unsqueeze_2) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_9 = [slice_34, full_30, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_9 = paddle._C_ops.stack(combine_9, 0) + del combine_9 + + # pd_op.reshape: (-1x3x49x49xf32) <- (-1x64x3x49x49xf32, 4xi64) + reshape_221 = paddle._C_ops.reshape(add_9, stack_9) + del stack_9 + + # pd_op.softmax: (-1x3x49x49xf32) <- (-1x3x49x49xf32) + softmax_1 = paddle._C_ops.softmax(reshape_221, -1) + del reshape_221 + + # pd_op.matmul: (-1x3x49x32xf32) <- (-1x3x49x49xf32, -1x3x49x32xf32) + matmul_125 = paddle._C_ops.matmul(softmax_1, slice_1, False, False) + + # pd_op.transpose: (-1x49x3x32xf32) <- (-1x3x49x32xf32) + transpose_11 = paddle._C_ops.transpose(matmul_125, [0, 2, 1, 3]) + del matmul_125 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_10 = [slice_34, full_29, full_26] + del slice_34 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_10 = paddle._C_ops.stack(combine_10, 0) + del combine_10 + + # pd_op.reshape: (-1x49x96xf32) <- (-1x49x3x32xf32, 3xi64) + reshape_14 = paddle._C_ops.reshape(transpose_11, stack_10) + del stack_10 + + # pd_op.matmul: (-1x49x96xf32) <- (-1x49x96xf32, 96x96xf32) + matmul_7 = paddle._C_ops.matmul(reshape_14, parameter_284, False, False) + del parameter_284 + + # pd_op.add: (-1x49x96xf32) <- (-1x49x96xf32, 96xf32) + add_10 = paddle._C_ops.add(matmul_7, parameter_283) + del parameter_283 + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x49x96xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(add_10, full_int_array_10) + del full_int_array_10 + + # pd_op.reshape: (-1x8x8x7x7x96xf32) <- (-1x7x7x96xf32, 6xi64) + reshape_222 = paddle._C_ops.reshape(reshape_15, full_int_array_14) + del full_int_array_14 + + # pd_op.transpose: (-1x8x7x8x7x96xf32) <- (-1x8x8x7x7x96xf32) + transpose_12 = paddle._C_ops.transpose(reshape_222, [0, 1, 3, 2, 4, 5]) + del reshape_222 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x8x7x8x7x96xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(transpose_12, full_int_array_15) + del full_int_array_15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_3 = [3, 3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_267 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_234 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_214 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_194 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_174 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_154 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_134 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_114 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_94 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_74 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_41 = full_int_array_3 + + # pd_op.roll: (-1x56x56x96xf32) <- (-1x56x56x96xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_16, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_11 = [slice_31, full_32, full_26] + del full_32, slice_31 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_11 = paddle._C_ops.stack(combine_11, 0) + del combine_11 + + # pd_op.reshape: (-1x3136x96xf32) <- (-1x56x56x96xf32, 3xi64) + reshape_17 = paddle._C_ops.reshape(roll_1, stack_11) + del stack_11 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], + float("0.986957"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_13 = full_1 + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_9 = paddle._C_ops.shape64(reshape_17) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + shape64_9, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_9 + + # pd_op.full: (xi64) <- () + full_39 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_12 = [slice_37, full_39, full_39] + del slice_37 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_12 = paddle._C_ops.stack(combine_12, 0) + del combine_12 + + # pd_op.full: (1xf32) <- () + full_40 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf32) <- () + full_41 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_0 = paddle._C_ops.uniform( + stack_12, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_12 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_171 = paddle._C_ops.add(full_1, uniform_0) + del uniform_0 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_0 = paddle._C_ops.floor(add_171) + del add_171 + + # pd_op.divide: (-1x3136x96xf32) <- (-1x3136x96xf32, xf32) + divide_0 = paddle._C_ops.divide(reshape_17, full_1) + + # pd_op.multiply: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x1x1xf32) + multiply_0 = paddle._C_ops.multiply(divide_0, floor_0) + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_11 = paddle._C_ops.add(add_6, multiply_0) + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_282, parameter_281, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_281, parameter_282 + + # pd_op.matmul: (-1x3136x384xf32) <- (-1x3136x96xf32, 96x384xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_12, parameter_280, False, False) + del parameter_280 + + # pd_op.add: (-1x3136x384xf32) <- (-1x3136x384xf32, 384xf32) + add_12 = paddle._C_ops.add(matmul_8, parameter_279) + del parameter_279 + + # pd_op.gelu: (-1x3136x384xf32) <- (-1x3136x384xf32) + gelu_1 = paddle._C_ops.gelu(add_12, False) + + # pd_op.matmul: (-1x3136x96xf32) <- (-1x3136x384xf32, 384x96xf32) + matmul_9 = paddle._C_ops.matmul(gelu_1, parameter_278, False, False) + del parameter_278 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, 96xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_277) + del parameter_277 + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_10 = paddle._C_ops.shape64(add_13) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + shape64_10, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_10 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_13 = [slice_38, full_39, full_39] + del slice_38 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_13 = paddle._C_ops.stack(combine_13, 0) + del combine_13 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_1 = paddle._C_ops.uniform( + stack_13, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_13 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_172 = paddle._C_ops.add(full_1, uniform_1) + del uniform_1 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_1 = paddle._C_ops.floor(add_172) + del add_172 + + # pd_op.divide: (-1x3136x96xf32) <- (-1x3136x96xf32, xf32) + divide_1 = paddle._C_ops.divide(add_13, full_1) + + # pd_op.multiply: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x1x1xf32) + multiply_1 = paddle._C_ops.multiply(divide_1, floor_1) + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_14 = paddle._C_ops.add(add_11, multiply_1) + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_11 = paddle._C_ops.shape64(add_14) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + shape64_11, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_11 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_14 = [slice_39, full_25, full_25, full_26] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_14 = paddle._C_ops.stack(combine_14, 0) + del combine_14 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x3136x96xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_14, stack_14) + del stack_14 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_4 = [2, 2] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_247 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_244 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_241 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_238 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_54 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_51 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_48 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_45 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_22 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_19 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_17 = full_int_array_4 + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_16, full_int_array_29, full_int_array_4 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [1, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_239 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_46 = full_int_array_5 + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_5, full_int_array_29, full_int_array_4 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_6 = [0, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_242 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_49 = full_int_array_6 + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_6, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_18, full_int_array_29, full_int_array_4 + ) + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_12 = paddle._C_ops.shape64(reshape_18) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + shape64_12, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_15 = [slice_40, full_25, full_25, full_26] + del full_25, full_26, slice_40 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_15 = paddle._C_ops.stack(combine_15, 0) + del combine_15 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x56x56x96xf32, 4xi64) + reshape_223 = paddle._C_ops.reshape(reshape_18, stack_15) + del stack_15 + + # pd_op.full: (1xi32) <- () + full_2 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xi32) <- (1xi32) + assign_248 = full_2 + + # pd_op.assign: (1xi32) <- (1xi32) + assign_55 = full_2 + + # builtin.combine: ([-1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32]) <- (-1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32) + combine_16 = [ + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + ] + + # pd_op.concat: (-1x28x28x384xf32) <- ([-1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_16, full_2) + del combine_16 + + # pd_op.full: (xi64) <- () + full_42 = paddle._C_ops.full( + [], float("-1"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_43 = paddle._C_ops.full( + [], float("384"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_17 = [slice_39, full_42, full_43] + del slice_39 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_16 = paddle._C_ops.stack(combine_17, 0) + del combine_17 + + # pd_op.reshape: (-1x-1x384xf32) <- (-1x28x28x384xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(concat_0, stack_16) + del stack_16 + + # pd_op.layer_norm: (-1x-1x384xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x384xf32, 384xf32, 384xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_19, parameter_276, parameter_275, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_275, parameter_276 + + # pd_op.matmul: (-1x-1x192xf32) <- (-1x-1x384xf32, 384x192xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_15, parameter_274, False, False) + del parameter_274 + + # pd_op.shape64: (3xi64) <- (-1x-1x192xf32) + shape64_13 = paddle._C_ops.shape64(matmul_10) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + shape64_13, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_13 + + # pd_op.shape64: (3xi64) <- (-1x-1x192xf32) + shape64_14 = paddle._C_ops.shape64(matmul_10) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + shape64_14, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + del shape64_14 + + # pd_op.layer_norm: (-1x-1x192xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x192xf32, 192xf32, 192xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_10, parameter_273, parameter_272, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_272, parameter_273 + + # pd_op.full: (xi64) <- () + full_44 = paddle._C_ops.full( + [], float("28"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_45 = paddle._C_ops.full( + [], float("192"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_18 = [slice_41, full_44, full_44, full_45] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_17 = paddle._C_ops.stack(combine_18, 0) + del combine_18 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x-1x192xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(layer_norm_18, stack_17) + del stack_17 + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_15 = paddle._C_ops.shape64(reshape_20) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + shape64_15, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_15 + + # pd_op.full: (xi64) <- () + full_46 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_19 = [slice_43, full_46, full_28, full_46, full_28, full_45] + del slice_43 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_18 = paddle._C_ops.stack(combine_19, 0) + del combine_19 + + # pd_op.reshape: (-1x4x7x4x7x192xf32) <- (-1x28x28x192xf32, 6xi64) + reshape_224 = paddle._C_ops.reshape(reshape_20, stack_18) + del stack_18 + + # pd_op.transpose: (-1x4x4x7x7x192xf32) <- (-1x4x7x4x7x192xf32) + transpose_13 = paddle._C_ops.transpose(reshape_224, [0, 1, 3, 2, 4, 5]) + del reshape_224 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_33 = [-1, 7, 7, 192] + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x4x4x7x7x192xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(transpose_13, full_int_array_33) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_34 = [-1, 49, 192] + + # pd_op.reshape: (-1x49x192xf32) <- (-1x7x7x192xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(reshape_21, full_int_array_34) + + # pd_op.shape64: (3xi64) <- (-1x49x192xf32) + shape64_16 = paddle._C_ops.shape64(reshape_22) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + shape64_16, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_16 + + # pd_op.matmul: (-1x49x576xf32) <- (-1x49x192xf32, 192x576xf32) + matmul_11 = paddle._C_ops.matmul(reshape_22, parameter_271, False, False) + del parameter_271 + + # pd_op.add: (-1x49x576xf32) <- (-1x49x576xf32, 576xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_270) + del parameter_270 + + # pd_op.full: (xi64) <- () + full_47 = paddle._C_ops.full( + [], float("6"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_20 = [slice_44, full_29, full_30, full_47, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_19 = paddle._C_ops.stack(combine_20, 0) + del combine_20 + + # pd_op.reshape: (-1x49x3x6x32xf32) <- (-1x49x576xf32, 5xi64) + reshape_225 = paddle._C_ops.reshape(add_15, stack_19) + del stack_19 + + # pd_op.transpose: (3x-1x6x49x32xf32) <- (-1x49x3x6x32xf32) + transpose_14 = paddle._C_ops.transpose(reshape_225, [2, 0, 3, 1, 4]) + del reshape_225 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x6x49x32xf32) <- (-1x6x49x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_45, full_0, float("0"), True) + del slice_45 + + # pd_op.transpose: (-1x6x32x49xf32) <- (-1x6x49x32xf32) + transpose_15 = paddle._C_ops.transpose(slice_46, [0, 1, 3, 2]) + del slice_46 + + # pd_op.matmul: (-1x6x49x49xf32) <- (-1x6x49x32xf32, -1x6x32x49xf32) + matmul_12 = paddle._C_ops.matmul(scale_2, transpose_15, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_23 = paddle._C_ops.reshape(data_5, full_int_array_12) + del data_5 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_2 = paddle._C_ops.index_select(data_6, reshape_23, 0) + del data_6 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_226 = paddle._C_ops.reshape(index_select_2, full_int_array_13) + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_16 = paddle._C_ops.transpose(reshape_226, [2, 0, 1]) + del reshape_226 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_16, full_int_array_7) + + # pd_op.add: (-1x6x49x49xf32) <- (-1x6x49x49xf32, 1x6x49x49xf32) + add_173 = paddle._C_ops.add(matmul_12, unsqueeze_3) + + # pd_op.softmax: (-1x6x49x49xf32) <- (-1x6x49x49xf32) + softmax_2 = paddle._C_ops.softmax(add_173, -1) + del add_173 + + # pd_op.matmul: (-1x6x49x32xf32) <- (-1x6x49x49xf32, -1x6x49x32xf32) + matmul_126 = paddle._C_ops.matmul(softmax_2, slice_2, False, False) + + # pd_op.transpose: (-1x49x6x32xf32) <- (-1x6x49x32xf32) + transpose_17 = paddle._C_ops.transpose(matmul_126, [0, 2, 1, 3]) + del matmul_126 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_21 = [slice_44, full_29, full_45] + del slice_44 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_20 = paddle._C_ops.stack(combine_21, 0) + del combine_21 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x49x6x32xf32, 3xi64) + reshape_24 = paddle._C_ops.reshape(transpose_17, stack_20) + del stack_20 + + # pd_op.matmul: (-1x49x192xf32) <- (-1x49x192xf32, 192x192xf32) + matmul_13 = paddle._C_ops.matmul(reshape_24, parameter_269, False, False) + del parameter_269 + + # pd_op.add: (-1x49x192xf32) <- (-1x49x192xf32, 192xf32) + add_16 = paddle._C_ops.add(matmul_13, parameter_268) + del parameter_268 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x49x192xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_16, full_int_array_33) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [-1, 4, 4, 7, 7, 192] + + # pd_op.reshape: (-1x4x4x7x7x192xf32) <- (-1x7x7x192xf32, 6xi64) + reshape_227 = paddle._C_ops.reshape(reshape_25, full_int_array_35) + + # pd_op.transpose: (-1x4x7x4x7x192xf32) <- (-1x4x4x7x7x192xf32) + transpose_18 = paddle._C_ops.transpose(reshape_227, [0, 1, 3, 2, 4, 5]) + del reshape_227 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 28, 28, 192] + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x4x7x4x7x192xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(transpose_18, full_int_array_36) + + # pd_op.full: (xi64) <- () + full_48 = paddle._C_ops.full( + [], float("784"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_22 = [slice_41, full_48, full_45] + del slice_41 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_21 = paddle._C_ops.stack(combine_22, 0) + del combine_22 + + # pd_op.reshape: (-1x784x192xf32) <- (-1x28x28x192xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, stack_21) + del stack_21 + + # pd_op.full: (xf32) <- () + full_3 = paddle._C_ops.full( + [], + float("0.973913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_31 = full_3 + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_17 = paddle._C_ops.shape64(reshape_27) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + shape64_17, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_17 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_23 = [slice_47, full_39, full_39] + del slice_47 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_22 = paddle._C_ops.stack(combine_23, 0) + del combine_23 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_2 = paddle._C_ops.uniform( + stack_22, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_22 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_174 = paddle._C_ops.add(full_3, uniform_2) + del uniform_2 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_2 = paddle._C_ops.floor(add_174) + del add_174 + + # pd_op.divide: (-1x784x192xf32) <- (-1x784x192xf32, xf32) + divide_2 = paddle._C_ops.divide(reshape_27, full_3) + + # pd_op.multiply: (-1x784x192xf32) <- (-1x784x192xf32, -1x1x1xf32) + multiply_2 = paddle._C_ops.multiply(divide_2, floor_2) + + # pd_op.add: (-1x784x192xf32) <- (-1x-1x192xf32, -1x784x192xf32) + add_17 = paddle._C_ops.add(matmul_10, multiply_2) + + # pd_op.layer_norm: (-1x784x192xf32, -1x784xf32, -1x784xf32) <- (-1x784x192xf32, 192xf32, 192xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_267, parameter_266, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_266, parameter_267 + + # pd_op.matmul: (-1x784x768xf32) <- (-1x784x192xf32, 192x768xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_21, parameter_265, False, False) + del parameter_265 + + # pd_op.add: (-1x784x768xf32) <- (-1x784x768xf32, 768xf32) + add_18 = paddle._C_ops.add(matmul_14, parameter_264) + del parameter_264 + + # pd_op.gelu: (-1x784x768xf32) <- (-1x784x768xf32) + gelu_2 = paddle._C_ops.gelu(add_18, False) + + # pd_op.matmul: (-1x784x192xf32) <- (-1x784x768xf32, 768x192xf32) + matmul_15 = paddle._C_ops.matmul(gelu_2, parameter_263, False, False) + del parameter_263 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, 192xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_262) + del parameter_262 + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_18 = paddle._C_ops.shape64(add_19) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + shape64_18, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_18 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_24 = [slice_48, full_39, full_39] + del slice_48 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_23 = paddle._C_ops.stack(combine_24, 0) + del combine_24 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_3 = paddle._C_ops.uniform( + stack_23, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_23 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_175 = paddle._C_ops.add(full_3, uniform_3) + del uniform_3 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_3 = paddle._C_ops.floor(add_175) + del add_175 + + # pd_op.divide: (-1x784x192xf32) <- (-1x784x192xf32, xf32) + divide_3 = paddle._C_ops.divide(add_19, full_3) + + # pd_op.multiply: (-1x784x192xf32) <- (-1x784x192xf32, -1x1x1xf32) + multiply_3 = paddle._C_ops.multiply(divide_3, floor_3) + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, -1x784x192xf32) + add_20 = paddle._C_ops.add(add_17, multiply_3) + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_19 = paddle._C_ops.shape64(add_20) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + shape64_19, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_19 + + # pd_op.layer_norm: (-1x784x192xf32, -1x784xf32, -1x784xf32) <- (-1x784x192xf32, 192xf32, 192xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_261, parameter_260, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_260, parameter_261 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_25 = [slice_49, full_44, full_44, full_45] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_24 = paddle._C_ops.stack(combine_25, 0) + del combine_25 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x784x192xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(layer_norm_24, stack_24) + del stack_24 + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_20 = paddle._C_ops.shape64(reshape_28) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + shape64_20, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_20 + + # pd_op.roll: (-1x28x28x192xf32) <- (-1x28x28x192xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_28, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_21 = paddle._C_ops.shape64(roll_2) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + shape64_21, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_21 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_26 = [slice_51, full_46, full_28, full_46, full_28, full_45] + del slice_51 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_25 = paddle._C_ops.stack(combine_26, 0) + del combine_26 + + # pd_op.reshape: (-1x4x7x4x7x192xf32) <- (-1x28x28x192xf32, 6xi64) + reshape_228 = paddle._C_ops.reshape(roll_2, stack_25) + del stack_25 + + # pd_op.transpose: (-1x4x4x7x7x192xf32) <- (-1x4x7x4x7x192xf32) + transpose_19 = paddle._C_ops.transpose(reshape_228, [0, 1, 3, 2, 4, 5]) + del reshape_228 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x4x4x7x7x192xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(transpose_19, full_int_array_33) + + # pd_op.reshape: (-1x49x192xf32) <- (-1x7x7x192xf32, 3xi64) + reshape_30 = paddle._C_ops.reshape(reshape_29, full_int_array_34) + del full_int_array_34 + + # pd_op.full: (1x28x28x1xf32) <- () + full_49 = paddle._C_ops.full( + [1, 28, 28, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + full_49, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_49 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__20 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__21 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__22 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__23 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__24 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__25 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__26 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__27 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_37 = [1, 4, 7, 4, 7, 1] + + # pd_op.reshape: (1x4x7x4x7x1xf32) <- (1x28x28x1xf32, 6xi64) + reshape_229 = paddle._C_ops.reshape(set_value__1, full_int_array_37) + del full_int_array_37 + + # pd_op.transpose: (1x4x4x7x7x1xf32) <- (1x4x7x4x7x1xf32) + transpose_147 = paddle._C_ops.transpose(reshape_229, [0, 1, 3, 2, 4, 5]) + del reshape_229 + + # pd_op.reshape: (16x7x7x1xf32) <- (1x4x4x7x7x1xf32, 4xi64) + reshape_230 = paddle._C_ops.reshape(transpose_147, full_int_array_31) + del transpose_147 + + # pd_op.reshape: (16x49xf32) <- (16x7x7x1xf32, 2xi64) + reshape_231 = paddle._C_ops.reshape(reshape_230, full_int_array_32) + del reshape_230 + + # pd_op.unsqueeze: (16x1x49xf32) <- (16x49xf32, 1xi64) + unsqueeze_40 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_8) + + # pd_op.unsqueeze: (16x49x1xf32) <- (16x49xf32, 1xi64) + unsqueeze_41 = paddle._C_ops.unsqueeze(reshape_231, full_int_array_0) + del reshape_231 + + # pd_op.subtract: (16x49x49xf32) <- (16x1x49xf32, 16x49x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_40, unsqueeze_41) + del unsqueeze_40, unsqueeze_41 + + # pd_op.not_equal: (16x49x49xb) <- (16x49x49xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_34) + + # pd_op.full: (16x49x49xf32) <- () + full_50 = paddle._C_ops.full( + [16, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_50, subtract_1) + del full_50, not_equal_1, subtract_1 + + # pd_op.equal: (16x49x49xb) <- (16x49x49xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_34) + + # pd_op.full: (16x49x49xf32) <- () + full_51 = paddle._C_ops.full( + [16, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_3 = paddle._C_ops.where(equal_1, full_51, where_2) + del equal_1, full_51, where_2 + + # pd_op.shape64: (3xi64) <- (-1x49x192xf32) + shape64_22 = paddle._C_ops.shape64(reshape_30) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + shape64_22, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_22 + + # pd_op.matmul: (-1x49x576xf32) <- (-1x49x192xf32, 192x576xf32) + matmul_16 = paddle._C_ops.matmul(reshape_30, parameter_259, False, False) + del parameter_259 + + # pd_op.add: (-1x49x576xf32) <- (-1x49x576xf32, 576xf32) + add_21 = paddle._C_ops.add(matmul_16, parameter_258) + del parameter_258 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_27 = [slice_52, full_29, full_30, full_47, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_26 = paddle._C_ops.stack(combine_27, 0) + del combine_27 + + # pd_op.reshape: (-1x49x3x6x32xf32) <- (-1x49x576xf32, 5xi64) + reshape_232 = paddle._C_ops.reshape(add_21, stack_26) + del stack_26 + + # pd_op.transpose: (3x-1x6x49x32xf32) <- (-1x49x3x6x32xf32) + transpose_20 = paddle._C_ops.transpose(reshape_232, [2, 0, 3, 1, 4]) + del reshape_232 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x6x49x32xf32) <- (-1x6x49x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_53, full_0, float("0"), True) + del slice_53 + + # pd_op.transpose: (-1x6x32x49xf32) <- (-1x6x49x32xf32) + transpose_21 = paddle._C_ops.transpose(slice_54, [0, 1, 3, 2]) + del slice_54 + + # pd_op.matmul: (-1x6x49x49xf32) <- (-1x6x49x32xf32, -1x6x32x49xf32) + matmul_17 = paddle._C_ops.matmul(scale_3, transpose_21, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_31 = paddle._C_ops.reshape(data_7, full_int_array_12) + del data_7 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_3 = paddle._C_ops.index_select(data_8, reshape_31, 0) + del data_8 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_233 = paddle._C_ops.reshape(index_select_3, full_int_array_13) + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_22 = paddle._C_ops.transpose(reshape_233, [2, 0, 1]) + del reshape_233 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_22, full_int_array_7) + + # pd_op.add: (-1x6x49x49xf32) <- (-1x6x49x49xf32, 1x6x49x49xf32) + add_22 = paddle._C_ops.add(matmul_17, unsqueeze_4) + + # pd_op.full: (xi64) <- () + full_52 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_1 = paddle._C_ops.floor_divide(slice_52, full_52) + del full_52 + + # pd_op.full: (xi64) <- () + full_53 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_28 = [floor_divide_1, full_53, full_47, full_29, full_29] + del floor_divide_1, full_53 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_27 = paddle._C_ops.stack(combine_28, 0) + del combine_28 + + # pd_op.reshape: (-1x16x6x49x49xf32) <- (-1x6x49x49xf32, 5xi64) + reshape_32 = paddle._C_ops.reshape(add_22, stack_27) + del stack_27 + + # pd_op.unsqueeze: (16x1x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_42 = paddle._C_ops.unsqueeze(where_3, full_int_array_8) + del where_3 + + # pd_op.unsqueeze: (1x16x1x49x49xf32) <- (16x1x49x49xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_42, full_int_array_7) + del unsqueeze_42 + + # pd_op.add: (-1x16x6x49x49xf32) <- (-1x16x6x49x49xf32, 1x16x1x49x49xf32) + add_23 = paddle._C_ops.add(reshape_32, unsqueeze_5) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_29 = [slice_52, full_47, full_29, full_29] + del full_47 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_28 = paddle._C_ops.stack(combine_29, 0) + del combine_29 + + # pd_op.reshape: (-1x6x49x49xf32) <- (-1x16x6x49x49xf32, 4xi64) + reshape_234 = paddle._C_ops.reshape(add_23, stack_28) + del stack_28 + + # pd_op.softmax: (-1x6x49x49xf32) <- (-1x6x49x49xf32) + softmax_3 = paddle._C_ops.softmax(reshape_234, -1) + del reshape_234 + + # pd_op.matmul: (-1x6x49x32xf32) <- (-1x6x49x49xf32, -1x6x49x32xf32) + matmul_127 = paddle._C_ops.matmul(softmax_3, slice_3, False, False) + + # pd_op.transpose: (-1x49x6x32xf32) <- (-1x6x49x32xf32) + transpose_23 = paddle._C_ops.transpose(matmul_127, [0, 2, 1, 3]) + del matmul_127 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_30 = [slice_52, full_29, full_45] + del slice_52 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_29 = paddle._C_ops.stack(combine_30, 0) + del combine_30 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x49x6x32xf32, 3xi64) + reshape_33 = paddle._C_ops.reshape(transpose_23, stack_29) + del stack_29 + + # pd_op.matmul: (-1x49x192xf32) <- (-1x49x192xf32, 192x192xf32) + matmul_18 = paddle._C_ops.matmul(reshape_33, parameter_257, False, False) + del parameter_257 + + # pd_op.add: (-1x49x192xf32) <- (-1x49x192xf32, 192xf32) + add_24 = paddle._C_ops.add(matmul_18, parameter_256) + del parameter_256 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x49x192xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_24, full_int_array_33) + del full_int_array_33 + + # pd_op.reshape: (-1x4x4x7x7x192xf32) <- (-1x7x7x192xf32, 6xi64) + reshape_235 = paddle._C_ops.reshape(reshape_34, full_int_array_35) + del full_int_array_35 + + # pd_op.transpose: (-1x4x7x4x7x192xf32) <- (-1x4x4x7x7x192xf32) + transpose_24 = paddle._C_ops.transpose(reshape_235, [0, 1, 3, 2, 4, 5]) + del reshape_235 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x4x7x4x7x192xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_24, full_int_array_36) + del full_int_array_36 + + # pd_op.roll: (-1x28x28x192xf32) <- (-1x28x28x192xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_35, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_31 = [slice_49, full_48, full_45] + del full_48, slice_49 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_30 = paddle._C_ops.stack(combine_31, 0) + del combine_31 + + # pd_op.reshape: (-1x784x192xf32) <- (-1x28x28x192xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(roll_3, stack_30) + del stack_30 + + # pd_op.full: (xf32) <- () + full_4 = paddle._C_ops.full( + [], + float("0.96087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_42 = full_4 + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_23 = paddle._C_ops.shape64(reshape_36) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + shape64_23, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_23 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_32 = [slice_55, full_39, full_39] + del slice_55 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_31 = paddle._C_ops.stack(combine_32, 0) + del combine_32 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_4 = paddle._C_ops.uniform( + stack_31, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_31 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_176 = paddle._C_ops.add(full_4, uniform_4) + del uniform_4 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_4 = paddle._C_ops.floor(add_176) + del add_176 + + # pd_op.divide: (-1x784x192xf32) <- (-1x784x192xf32, xf32) + divide_4 = paddle._C_ops.divide(reshape_36, full_4) + + # pd_op.multiply: (-1x784x192xf32) <- (-1x784x192xf32, -1x1x1xf32) + multiply_4 = paddle._C_ops.multiply(divide_4, floor_4) + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, -1x784x192xf32) + add_25 = paddle._C_ops.add(add_20, multiply_4) + + # pd_op.layer_norm: (-1x784x192xf32, -1x784xf32, -1x784xf32) <- (-1x784x192xf32, 192xf32, 192xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_255, parameter_254, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_254, parameter_255 + + # pd_op.matmul: (-1x784x768xf32) <- (-1x784x192xf32, 192x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_27, parameter_253, False, False) + del parameter_253 + + # pd_op.add: (-1x784x768xf32) <- (-1x784x768xf32, 768xf32) + add_26 = paddle._C_ops.add(matmul_19, parameter_252) + del parameter_252 + + # pd_op.gelu: (-1x784x768xf32) <- (-1x784x768xf32) + gelu_3 = paddle._C_ops.gelu(add_26, False) + + # pd_op.matmul: (-1x784x192xf32) <- (-1x784x768xf32, 768x192xf32) + matmul_20 = paddle._C_ops.matmul(gelu_3, parameter_251, False, False) + del parameter_251 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, 192xf32) + add_27 = paddle._C_ops.add(matmul_20, parameter_250) + del parameter_250 + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_24 = paddle._C_ops.shape64(add_27) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + shape64_24, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_24 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_33 = [slice_56, full_39, full_39] + del slice_56 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_32 = paddle._C_ops.stack(combine_33, 0) + del combine_33 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_5 = paddle._C_ops.uniform( + stack_32, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_32 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_177 = paddle._C_ops.add(full_4, uniform_5) + del uniform_5 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_5 = paddle._C_ops.floor(add_177) + del add_177 + + # pd_op.divide: (-1x784x192xf32) <- (-1x784x192xf32, xf32) + divide_5 = paddle._C_ops.divide(add_27, full_4) + + # pd_op.multiply: (-1x784x192xf32) <- (-1x784x192xf32, -1x1x1xf32) + multiply_5 = paddle._C_ops.multiply(divide_5, floor_5) + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, -1x784x192xf32) + add_28 = paddle._C_ops.add(add_25, multiply_5) + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_25 = paddle._C_ops.shape64(add_28) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + shape64_25, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_25 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_34 = [slice_57, full_44, full_44, full_45] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_33 = paddle._C_ops.stack(combine_34, 0) + del combine_34 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x784x192xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_28, stack_33) + del stack_33 + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_16, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_5, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_6, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_18, full_int_array_29, full_int_array_4 + ) + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_26 = paddle._C_ops.shape64(reshape_37) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + shape64_26, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_26 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_35 = [slice_58, full_44, full_44, full_45] + del full_44, full_45, slice_58 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_34 = paddle._C_ops.stack(combine_35, 0) + del combine_35 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x28x28x192xf32, 4xi64) + reshape_236 = paddle._C_ops.reshape(reshape_37, stack_34) + del stack_34 + + # builtin.combine: ([-1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32]) <- (-1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32) + combine_36 = [ + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + ] + + # pd_op.concat: (-1x14x14x768xf32) <- ([-1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_36, full_2) + del combine_36 + + # pd_op.full: (xi64) <- () + full_54 = paddle._C_ops.full( + [], float("768"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_37 = [slice_57, full_42, full_54] + del slice_57 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_35 = paddle._C_ops.stack(combine_37, 0) + del combine_37 + + # pd_op.reshape: (-1x-1x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_38 = paddle._C_ops.reshape(concat_1, stack_35) + del stack_35 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_38, parameter_249, parameter_248, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_248, parameter_249 + + # pd_op.matmul: (-1x-1x384xf32) <- (-1x-1x768xf32, 768x384xf32) + matmul_21 = paddle._C_ops.matmul(layer_norm_30, parameter_247, False, False) + del parameter_247 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_27 = paddle._C_ops.shape64(matmul_21) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + shape64_27, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_27 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_28 = paddle._C_ops.shape64(matmul_21) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + shape64_28, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + del shape64_28 + + # pd_op.layer_norm: (-1x-1x384xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x384xf32, 384xf32, 384xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_21, parameter_246, parameter_245, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_245, parameter_246 + + # pd_op.full: (xi64) <- () + full_55 = paddle._C_ops.full( + [], float("14"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_38 = [slice_59, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_36 = paddle._C_ops.stack(combine_38, 0) + del combine_38 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x-1x384xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(layer_norm_33, stack_36) + del stack_36 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_29 = paddle._C_ops.shape64(reshape_39) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + shape64_29, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_29 + + # pd_op.full: (xi64) <- () + full_56 = paddle._C_ops.full( + [], float("2"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_39 = [slice_61, full_56, full_28, full_56, full_28, full_43] + del slice_61 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_37 = paddle._C_ops.stack(combine_39, 0) + del combine_39 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_237 = paddle._C_ops.reshape(reshape_39, stack_37) + del stack_37 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_25 = paddle._C_ops.transpose(reshape_237, [0, 1, 3, 2, 4, 5]) + del reshape_237 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_38 = [-1, 7, 7, 384] + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(transpose_25, full_int_array_38) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_39 = [-1, 49, 384] + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(reshape_40, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_30 = paddle._C_ops.shape64(reshape_41) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + shape64_30, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_30 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_22 = paddle._C_ops.matmul(reshape_41, parameter_244, False, False) + del parameter_244 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_29 = paddle._C_ops.add(matmul_22, parameter_243) + del parameter_243 + + # pd_op.full: (xi64) <- () + full_57 = paddle._C_ops.full( + [], float("12"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_40 = [slice_62, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_38 = paddle._C_ops.stack(combine_40, 0) + del combine_40 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_238 = paddle._C_ops.reshape(add_29, stack_38) + del stack_38 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_26 = paddle._C_ops.transpose(reshape_238, [2, 0, 3, 1, 4]) + del reshape_238 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_63, full_0, float("0"), True) + del slice_63 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_27 = paddle._C_ops.transpose(slice_64, [0, 1, 3, 2]) + del slice_64 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_23 = paddle._C_ops.matmul(scale_4, transpose_27, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_42 = paddle._C_ops.reshape(data_9, full_int_array_12) + del data_9 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_4 = paddle._C_ops.index_select(data_10, reshape_42, 0) + del data_10 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_239 = paddle._C_ops.reshape(index_select_4, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_28 = paddle._C_ops.transpose(reshape_239, [2, 0, 1]) + del reshape_239 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_28, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_178 = paddle._C_ops.add(matmul_23, unsqueeze_6) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_4 = paddle._C_ops.softmax(add_178, -1) + del add_178 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_128 = paddle._C_ops.matmul(softmax_4, slice_4, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_29 = paddle._C_ops.transpose(matmul_128, [0, 2, 1, 3]) + del matmul_128 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_41 = [slice_62, full_29, full_43] + del slice_62 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_39 = paddle._C_ops.stack(combine_41, 0) + del combine_41 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_29, stack_39) + del stack_39 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_24 = paddle._C_ops.matmul(reshape_43, parameter_242, False, False) + del parameter_242 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_241) + del parameter_241 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_30, full_int_array_38) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_40 = [-1, 2, 2, 7, 7, 384] + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_240 = paddle._C_ops.reshape(reshape_44, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_30 = paddle._C_ops.transpose(reshape_240, [0, 1, 3, 2, 4, 5]) + del reshape_240 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_41 = [-1, 14, 14, 384] + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(transpose_30, full_int_array_41) + + # pd_op.full: (xi64) <- () + full_58 = paddle._C_ops.full( + [], float("196"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_42 = [slice_59, full_58, full_43] + del slice_59 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_40 = paddle._C_ops.stack(combine_42, 0) + del combine_42 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_46 = paddle._C_ops.reshape(reshape_45, stack_40) + del stack_40 + + # pd_op.full: (xf32) <- () + full_5 = paddle._C_ops.full( + [], + float("0.947826"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_64 = full_5 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_31 = paddle._C_ops.shape64(reshape_46) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + shape64_31, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_31 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_43 = [slice_65, full_39, full_39] + del slice_65 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_41 = paddle._C_ops.stack(combine_43, 0) + del combine_43 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_6 = paddle._C_ops.uniform( + stack_41, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_41 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_179 = paddle._C_ops.add(full_5, uniform_6) + del uniform_6 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_6 = paddle._C_ops.floor(add_179) + del add_179 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_6 = paddle._C_ops.divide(reshape_46, full_5) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_6 = paddle._C_ops.multiply(divide_6, floor_6) + + # pd_op.add: (-1x196x384xf32) <- (-1x-1x384xf32, -1x196x384xf32) + add_31 = paddle._C_ops.add(matmul_21, multiply_6) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_31, parameter_240, parameter_239, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_239, parameter_240 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_36, parameter_238, False, False) + del parameter_238 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_32 = paddle._C_ops.add(matmul_25, parameter_237) + del parameter_237 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_4 = paddle._C_ops.gelu(add_32, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_26 = paddle._C_ops.matmul(gelu_4, parameter_236, False, False) + del parameter_236 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_33 = paddle._C_ops.add(matmul_26, parameter_235) + del parameter_235 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_32 = paddle._C_ops.shape64(add_33) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + shape64_32, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_32 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_44 = [slice_66, full_39, full_39] + del slice_66 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_42 = paddle._C_ops.stack(combine_44, 0) + del combine_44 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_7 = paddle._C_ops.uniform( + stack_42, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_42 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_180 = paddle._C_ops.add(full_5, uniform_7) + del uniform_7 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_7 = paddle._C_ops.floor(add_180) + del add_180 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_7 = paddle._C_ops.divide(add_33, full_5) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_7 = paddle._C_ops.multiply(divide_7, floor_7) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_34 = paddle._C_ops.add(add_31, multiply_7) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_33 = paddle._C_ops.shape64(add_34) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + shape64_33, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_33 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_234, parameter_233, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_233, parameter_234 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_45 = [slice_67, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_43 = paddle._C_ops.stack(combine_45, 0) + del combine_45 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(layer_norm_39, stack_43) + del stack_43 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_34 = paddle._C_ops.shape64(reshape_47) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + shape64_34, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_34 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_47, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_35 = paddle._C_ops.shape64(roll_4) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + shape64_35, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_35 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_46 = [slice_69, full_56, full_28, full_56, full_28, full_43] + del slice_69 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_44 = paddle._C_ops.stack(combine_46, 0) + del combine_46 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_241 = paddle._C_ops.reshape(roll_4, stack_44) + del stack_44 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_31 = paddle._C_ops.transpose(reshape_241, [0, 1, 3, 2, 4, 5]) + del reshape_241 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(transpose_31, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_49 = paddle._C_ops.reshape(reshape_48, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_59 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + full_59, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_59 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__28 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__29 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__30 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__32 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__35 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_42 = [1, 2, 7, 2, 7, 1] + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_242 = paddle._C_ops.reshape(set_value__2, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_148 = paddle._C_ops.transpose(reshape_242, [0, 1, 3, 2, 4, 5]) + del reshape_242 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_243 = paddle._C_ops.reshape(transpose_148, full_int_array_31) + del transpose_148 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_244 = paddle._C_ops.reshape(reshape_243, full_int_array_32) + del reshape_243 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_43 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_44 = paddle._C_ops.unsqueeze(reshape_244, full_int_array_0) + del reshape_244 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_43, unsqueeze_44) + del unsqueeze_43, unsqueeze_44 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_34) + + # pd_op.full: (4x49x49xf32) <- () + full_60 = paddle._C_ops.full( + [4, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_60, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_34) + + # pd_op.full: (4x49x49xf32) <- () + full_61 = paddle._C_ops.full( + [4, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_5 = paddle._C_ops.where(equal_2, full_61, where_4) + del equal_2, where_4 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_36 = paddle._C_ops.shape64(reshape_49) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + shape64_36, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_36 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_27 = paddle._C_ops.matmul(reshape_49, parameter_232, False, False) + del parameter_232 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_35 = paddle._C_ops.add(matmul_27, parameter_231) + del parameter_231 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_47 = [slice_70, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_45 = paddle._C_ops.stack(combine_47, 0) + del combine_47 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_245 = paddle._C_ops.reshape(add_35, stack_45) + del stack_45 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_32 = paddle._C_ops.transpose(reshape_245, [2, 0, 3, 1, 4]) + del reshape_245 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_72 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_71, full_0, float("0"), True) + del slice_71 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_33 = paddle._C_ops.transpose(slice_72, [0, 1, 3, 2]) + del slice_72 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_28 = paddle._C_ops.matmul(scale_5, transpose_33, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_50 = paddle._C_ops.reshape(data_11, full_int_array_12) + del data_11 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_5 = paddle._C_ops.index_select(data_12, reshape_50, 0) + del data_12 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_246 = paddle._C_ops.reshape(index_select_5, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_34 = paddle._C_ops.transpose(reshape_246, [2, 0, 1]) + del reshape_246 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(transpose_34, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_36 = paddle._C_ops.add(matmul_28, unsqueeze_7) + + # pd_op.full: (xi64) <- () + full_62 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_2 = paddle._C_ops.floor_divide(slice_70, full_62) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_48 = [floor_divide_2, full_46, full_57, full_29, full_29] + del floor_divide_2 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_46 = paddle._C_ops.stack(combine_48, 0) + del combine_48 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_51 = paddle._C_ops.reshape(add_36, stack_46) + del stack_46 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_45 = paddle._C_ops.unsqueeze(where_5, full_int_array_8) + del where_5 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(unsqueeze_45, full_int_array_7) + del unsqueeze_45 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_37 = paddle._C_ops.add(reshape_51, unsqueeze_8) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_49 = [slice_70, full_57, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_47 = paddle._C_ops.stack(combine_49, 0) + del combine_49 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_247 = paddle._C_ops.reshape(add_37, stack_47) + del stack_47 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_5 = paddle._C_ops.softmax(reshape_247, -1) + del reshape_247 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_129 = paddle._C_ops.matmul(softmax_5, slice_5, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_35 = paddle._C_ops.transpose(matmul_129, [0, 2, 1, 3]) + del matmul_129 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_50 = [slice_70, full_29, full_43] + del slice_70 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_48 = paddle._C_ops.stack(combine_50, 0) + del combine_50 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_52 = paddle._C_ops.reshape(transpose_35, stack_48) + del stack_48 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_29 = paddle._C_ops.matmul(reshape_52, parameter_230, False, False) + del parameter_230 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_38 = paddle._C_ops.add(matmul_29, parameter_229) + del parameter_229 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(add_38, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_248 = paddle._C_ops.reshape(reshape_53, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_36 = paddle._C_ops.transpose(reshape_248, [0, 1, 3, 2, 4, 5]) + del reshape_248 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(transpose_36, full_int_array_41) + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_54, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_51 = [slice_67, full_58, full_43] + del slice_67 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_49 = paddle._C_ops.stack(combine_51, 0) + del combine_51 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(roll_5, stack_49) + del stack_49 + + # pd_op.full: (xf32) <- () + full_6 = paddle._C_ops.full( + [], + float("0.934783"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_75 = full_6 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_37 = paddle._C_ops.shape64(reshape_55) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_73 = paddle._C_ops.slice( + shape64_37, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_37 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_52 = [slice_73, full_39, full_39] + del slice_73 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_50 = paddle._C_ops.stack(combine_52, 0) + del combine_52 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_8 = paddle._C_ops.uniform( + stack_50, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_50 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_181 = paddle._C_ops.add(full_6, uniform_8) + del uniform_8 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_8 = paddle._C_ops.floor(add_181) + del add_181 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_8 = paddle._C_ops.divide(reshape_55, full_6) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_8 = paddle._C_ops.multiply(divide_8, floor_8) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_39 = paddle._C_ops.add(add_34, multiply_8) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_39, parameter_228, parameter_227, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_227, parameter_228 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_42, parameter_226, False, False) + del parameter_226 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_40 = paddle._C_ops.add(matmul_30, parameter_225) + del parameter_225 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_5 = paddle._C_ops.gelu(add_40, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_31 = paddle._C_ops.matmul(gelu_5, parameter_224, False, False) + del parameter_224 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_41 = paddle._C_ops.add(matmul_31, parameter_223) + del parameter_223 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_38 = paddle._C_ops.shape64(add_41) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_74 = paddle._C_ops.slice( + shape64_38, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_38 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_53 = [slice_74, full_39, full_39] + del slice_74 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_51 = paddle._C_ops.stack(combine_53, 0) + del combine_53 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_9 = paddle._C_ops.uniform( + stack_51, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_51 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_182 = paddle._C_ops.add(full_6, uniform_9) + del uniform_9 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_9 = paddle._C_ops.floor(add_182) + del add_182 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_9 = paddle._C_ops.divide(add_41, full_6) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_9 = paddle._C_ops.multiply(divide_9, floor_9) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_42 = paddle._C_ops.add(add_39, multiply_9) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_39 = paddle._C_ops.shape64(add_42) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_75 = paddle._C_ops.slice( + shape64_39, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_39 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_222, parameter_221, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_221, parameter_222 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_54 = [slice_75, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_52 = paddle._C_ops.stack(combine_54, 0) + del combine_54 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(layer_norm_45, stack_52) + del stack_52 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_40 = paddle._C_ops.shape64(reshape_56) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_76 = paddle._C_ops.slice( + shape64_40, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_40 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_55 = [slice_76, full_56, full_28, full_56, full_28, full_43] + del slice_76 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_53 = paddle._C_ops.stack(combine_55, 0) + del combine_55 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_249 = paddle._C_ops.reshape(reshape_56, stack_53) + del stack_53 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_37 = paddle._C_ops.transpose(reshape_249, [0, 1, 3, 2, 4, 5]) + del reshape_249 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(transpose_37, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_58 = paddle._C_ops.reshape(reshape_57, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_41 = paddle._C_ops.shape64(reshape_58) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_77 = paddle._C_ops.slice( + shape64_41, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_41 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_32 = paddle._C_ops.matmul(reshape_58, parameter_220, False, False) + del parameter_220 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_43 = paddle._C_ops.add(matmul_32, parameter_219) + del parameter_219 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_56 = [slice_77, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_54 = paddle._C_ops.stack(combine_56, 0) + del combine_56 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_250 = paddle._C_ops.reshape(add_43, stack_54) + del stack_54 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_38 = paddle._C_ops.transpose(reshape_250, [2, 0, 3, 1, 4]) + del reshape_250 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_78 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_79 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_78, full_0, float("0"), True) + del slice_78 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_39 = paddle._C_ops.transpose(slice_79, [0, 1, 3, 2]) + del slice_79 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_33 = paddle._C_ops.matmul(scale_6, transpose_39, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_59 = paddle._C_ops.reshape(data_13, full_int_array_12) + del data_13 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_6 = paddle._C_ops.index_select(data_14, reshape_59, 0) + del data_14 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_251 = paddle._C_ops.reshape(index_select_6, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_40 = paddle._C_ops.transpose(reshape_251, [2, 0, 1]) + del reshape_251 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_40, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_183 = paddle._C_ops.add(matmul_33, unsqueeze_9) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_6 = paddle._C_ops.softmax(add_183, -1) + del add_183 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_130 = paddle._C_ops.matmul(softmax_6, slice_6, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_41 = paddle._C_ops.transpose(matmul_130, [0, 2, 1, 3]) + del matmul_130 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_57 = [slice_77, full_29, full_43] + del slice_77 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_55 = paddle._C_ops.stack(combine_57, 0) + del combine_57 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_60 = paddle._C_ops.reshape(transpose_41, stack_55) + del stack_55 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_34 = paddle._C_ops.matmul(reshape_60, parameter_218, False, False) + del parameter_218 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_44 = paddle._C_ops.add(matmul_34, parameter_217) + del parameter_217 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(add_44, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_252 = paddle._C_ops.reshape(reshape_61, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_42 = paddle._C_ops.transpose(reshape_252, [0, 1, 3, 2, 4, 5]) + del reshape_252 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(transpose_42, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_58 = [slice_75, full_58, full_43] + del slice_75 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_56 = paddle._C_ops.stack(combine_58, 0) + del combine_58 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, stack_56) + del stack_56 + + # pd_op.full: (xf32) <- () + full_7 = paddle._C_ops.full( + [], + float("0.921739"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_84 = full_7 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_42 = paddle._C_ops.shape64(reshape_63) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_80 = paddle._C_ops.slice( + shape64_42, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_42 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_59 = [slice_80, full_39, full_39] + del slice_80 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_57 = paddle._C_ops.stack(combine_59, 0) + del combine_59 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_10 = paddle._C_ops.uniform( + stack_57, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_57 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_184 = paddle._C_ops.add(full_7, uniform_10) + del uniform_10 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_10 = paddle._C_ops.floor(add_184) + del add_184 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_10 = paddle._C_ops.divide(reshape_63, full_7) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_10 = paddle._C_ops.multiply(divide_10, floor_10) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_45 = paddle._C_ops.add(add_42, multiply_10) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_216, parameter_215, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_215, parameter_216 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_48, parameter_214, False, False) + del parameter_214 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_46 = paddle._C_ops.add(matmul_35, parameter_213) + del parameter_213 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_6 = paddle._C_ops.gelu(add_46, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_36 = paddle._C_ops.matmul(gelu_6, parameter_212, False, False) + del parameter_212 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_47 = paddle._C_ops.add(matmul_36, parameter_211) + del parameter_211 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_43 = paddle._C_ops.shape64(add_47) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_81 = paddle._C_ops.slice( + shape64_43, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_43 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_60 = [slice_81, full_39, full_39] + del slice_81 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_58 = paddle._C_ops.stack(combine_60, 0) + del combine_60 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_11 = paddle._C_ops.uniform( + stack_58, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_58 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_185 = paddle._C_ops.add(full_7, uniform_11) + del uniform_11 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_11 = paddle._C_ops.floor(add_185) + del add_185 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_11 = paddle._C_ops.divide(add_47, full_7) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_11 = paddle._C_ops.multiply(divide_11, floor_11) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_48 = paddle._C_ops.add(add_45, multiply_11) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_44 = paddle._C_ops.shape64(add_48) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_82 = paddle._C_ops.slice( + shape64_44, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_44 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_210, parameter_209, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_209, parameter_210 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_61 = [slice_82, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_59 = paddle._C_ops.stack(combine_61, 0) + del combine_61 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(layer_norm_51, stack_59) + del stack_59 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_45 = paddle._C_ops.shape64(reshape_64) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_83 = paddle._C_ops.slice( + shape64_45, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_45 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_64, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_46 = paddle._C_ops.shape64(roll_6) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_84 = paddle._C_ops.slice( + shape64_46, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_46 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_62 = [slice_84, full_56, full_28, full_56, full_28, full_43] + del slice_84 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_60 = paddle._C_ops.stack(combine_62, 0) + del combine_62 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_253 = paddle._C_ops.reshape(roll_6, stack_60) + del stack_60 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_43 = paddle._C_ops.transpose(reshape_253, [0, 1, 3, 2, 4, 5]) + del reshape_253 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(transpose_43, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_63 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + full_63, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_63 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__43 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_254 = paddle._C_ops.reshape(set_value__3, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_149 = paddle._C_ops.transpose(reshape_254, [0, 1, 3, 2, 4, 5]) + del reshape_254 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_255 = paddle._C_ops.reshape(transpose_149, full_int_array_31) + del transpose_149 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_256 = paddle._C_ops.reshape(reshape_255, full_int_array_32) + del reshape_255 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_46 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_47 = paddle._C_ops.unsqueeze(reshape_256, full_int_array_0) + del reshape_256 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_46, unsqueeze_47) + del unsqueeze_46, unsqueeze_47 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_60, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_7 = paddle._C_ops.where(equal_3, full_61, where_6) + del equal_3, where_6 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_47 = paddle._C_ops.shape64(reshape_66) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_85 = paddle._C_ops.slice( + shape64_47, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_47 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_37 = paddle._C_ops.matmul(reshape_66, parameter_208, False, False) + del parameter_208 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_49 = paddle._C_ops.add(matmul_37, parameter_207) + del parameter_207 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_63 = [slice_85, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_61 = paddle._C_ops.stack(combine_63, 0) + del combine_63 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_257 = paddle._C_ops.reshape(add_49, stack_61) + del stack_61 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_44 = paddle._C_ops.transpose(reshape_257, [2, 0, 3, 1, 4]) + del reshape_257 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_86 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_87 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_86, full_0, float("0"), True) + del slice_86 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_45 = paddle._C_ops.transpose(slice_87, [0, 1, 3, 2]) + del slice_87 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_38 = paddle._C_ops.matmul(scale_7, transpose_45, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_67 = paddle._C_ops.reshape(data_15, full_int_array_12) + del data_15 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_7 = paddle._C_ops.index_select(data_16, reshape_67, 0) + del data_16 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_258 = paddle._C_ops.reshape(index_select_7, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_46 = paddle._C_ops.transpose(reshape_258, [2, 0, 1]) + del reshape_258 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(transpose_46, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_50 = paddle._C_ops.add(matmul_38, unsqueeze_10) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_3 = paddle._C_ops.floor_divide(slice_85, full_62) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_64 = [floor_divide_3, full_46, full_57, full_29, full_29] + del floor_divide_3 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_62 = paddle._C_ops.stack(combine_64, 0) + del combine_64 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_68 = paddle._C_ops.reshape(add_50, stack_62) + del stack_62 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_48 = paddle._C_ops.unsqueeze(where_7, full_int_array_8) + del where_7 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_48, full_int_array_7) + del unsqueeze_48 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_51 = paddle._C_ops.add(reshape_68, unsqueeze_11) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_65 = [slice_85, full_57, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_63 = paddle._C_ops.stack(combine_65, 0) + del combine_65 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_259 = paddle._C_ops.reshape(add_51, stack_63) + del stack_63 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_7 = paddle._C_ops.softmax(reshape_259, -1) + del reshape_259 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_131 = paddle._C_ops.matmul(softmax_7, slice_7, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_47 = paddle._C_ops.transpose(matmul_131, [0, 2, 1, 3]) + del matmul_131 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_66 = [slice_85, full_29, full_43] + del slice_85 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_64 = paddle._C_ops.stack(combine_66, 0) + del combine_66 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(transpose_47, stack_64) + del stack_64 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_39 = paddle._C_ops.matmul(reshape_69, parameter_206, False, False) + del parameter_206 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_52 = paddle._C_ops.add(matmul_39, parameter_205) + del parameter_205 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(add_52, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_260 = paddle._C_ops.reshape(reshape_70, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_48 = paddle._C_ops.transpose(reshape_260, [0, 1, 3, 2, 4, 5]) + del reshape_260 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(transpose_48, full_int_array_41) + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_71, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_67 = [slice_82, full_58, full_43] + del slice_82 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_65 = paddle._C_ops.stack(combine_67, 0) + del combine_67 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(roll_7, stack_65) + del stack_65 + + # pd_op.full: (xf32) <- () + full_8 = paddle._C_ops.full( + [], + float("0.908696"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_95 = full_8 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_48 = paddle._C_ops.shape64(reshape_72) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_88 = paddle._C_ops.slice( + shape64_48, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_48 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_68 = [slice_88, full_39, full_39] + del slice_88 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_66 = paddle._C_ops.stack(combine_68, 0) + del combine_68 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_12 = paddle._C_ops.uniform( + stack_66, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_66 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_186 = paddle._C_ops.add(full_8, uniform_12) + del uniform_12 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_12 = paddle._C_ops.floor(add_186) + del add_186 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_12 = paddle._C_ops.divide(reshape_72, full_8) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_12 = paddle._C_ops.multiply(divide_12, floor_12) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_53 = paddle._C_ops.add(add_48, multiply_12) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_204, parameter_203, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_203, parameter_204 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_54, parameter_202, False, False) + del parameter_202 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_54 = paddle._C_ops.add(matmul_40, parameter_201) + del parameter_201 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_7 = paddle._C_ops.gelu(add_54, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_41 = paddle._C_ops.matmul(gelu_7, parameter_200, False, False) + del parameter_200 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_55 = paddle._C_ops.add(matmul_41, parameter_199) + del parameter_199 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_49 = paddle._C_ops.shape64(add_55) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_89 = paddle._C_ops.slice( + shape64_49, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_49 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_69 = [slice_89, full_39, full_39] + del slice_89 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_67 = paddle._C_ops.stack(combine_69, 0) + del combine_69 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_13 = paddle._C_ops.uniform( + stack_67, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_67 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_187 = paddle._C_ops.add(full_8, uniform_13) + del uniform_13 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_13 = paddle._C_ops.floor(add_187) + del add_187 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_13 = paddle._C_ops.divide(add_55, full_8) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_13 = paddle._C_ops.multiply(divide_13, floor_13) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_56 = paddle._C_ops.add(add_53, multiply_13) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_50 = paddle._C_ops.shape64(add_56) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_90 = paddle._C_ops.slice( + shape64_50, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_50 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_198, parameter_197, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_197, parameter_198 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_70 = [slice_90, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_68 = paddle._C_ops.stack(combine_70, 0) + del combine_70 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(layer_norm_57, stack_68) + del stack_68 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_51 = paddle._C_ops.shape64(reshape_73) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_91 = paddle._C_ops.slice( + shape64_51, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_71 = [slice_91, full_56, full_28, full_56, full_28, full_43] + del slice_91 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_69 = paddle._C_ops.stack(combine_71, 0) + del combine_71 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_261 = paddle._C_ops.reshape(reshape_73, stack_69) + del stack_69 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_49 = paddle._C_ops.transpose(reshape_261, [0, 1, 3, 2, 4, 5]) + del reshape_261 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(transpose_49, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_75 = paddle._C_ops.reshape(reshape_74, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_52 = paddle._C_ops.shape64(reshape_75) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_92 = paddle._C_ops.slice( + shape64_52, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_52 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_42 = paddle._C_ops.matmul(reshape_75, parameter_196, False, False) + del parameter_196 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_57 = paddle._C_ops.add(matmul_42, parameter_195) + del parameter_195 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_72 = [slice_92, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_70 = paddle._C_ops.stack(combine_72, 0) + del combine_72 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_262 = paddle._C_ops.reshape(add_57, stack_70) + del stack_70 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_50 = paddle._C_ops.transpose(reshape_262, [2, 0, 3, 1, 4]) + del reshape_262 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_93 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_94 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_93, full_0, float("0"), True) + del slice_93 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_51 = paddle._C_ops.transpose(slice_94, [0, 1, 3, 2]) + del slice_94 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_43 = paddle._C_ops.matmul(scale_8, transpose_51, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_76 = paddle._C_ops.reshape(data_17, full_int_array_12) + del data_17 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_8 = paddle._C_ops.index_select(data_18, reshape_76, 0) + del data_18 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_263 = paddle._C_ops.reshape(index_select_8, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_52 = paddle._C_ops.transpose(reshape_263, [2, 0, 1]) + del reshape_263 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_52, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_188 = paddle._C_ops.add(matmul_43, unsqueeze_12) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_8 = paddle._C_ops.softmax(add_188, -1) + del add_188 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_132 = paddle._C_ops.matmul(softmax_8, slice_8, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_53 = paddle._C_ops.transpose(matmul_132, [0, 2, 1, 3]) + del matmul_132 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_73 = [slice_92, full_29, full_43] + del slice_92 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_71 = paddle._C_ops.stack(combine_73, 0) + del combine_73 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_77 = paddle._C_ops.reshape(transpose_53, stack_71) + del stack_71 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_44 = paddle._C_ops.matmul(reshape_77, parameter_194, False, False) + del parameter_194 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_58 = paddle._C_ops.add(matmul_44, parameter_193) + del parameter_193 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(add_58, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_264 = paddle._C_ops.reshape(reshape_78, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_54 = paddle._C_ops.transpose(reshape_264, [0, 1, 3, 2, 4, 5]) + del reshape_264 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_54, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_74 = [slice_90, full_58, full_43] + del slice_90 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_72 = paddle._C_ops.stack(combine_74, 0) + del combine_74 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, stack_72) + del stack_72 + + # pd_op.full: (xf32) <- () + full_9 = paddle._C_ops.full( + [], + float("0.895652"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_104 = full_9 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_53 = paddle._C_ops.shape64(reshape_80) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_95 = paddle._C_ops.slice( + shape64_53, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_53 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_75 = [slice_95, full_39, full_39] + del slice_95 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_73 = paddle._C_ops.stack(combine_75, 0) + del combine_75 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_14 = paddle._C_ops.uniform( + stack_73, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_73 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_189 = paddle._C_ops.add(full_9, uniform_14) + del uniform_14 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_14 = paddle._C_ops.floor(add_189) + del add_189 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_14 = paddle._C_ops.divide(reshape_80, full_9) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_14 = paddle._C_ops.multiply(divide_14, floor_14) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_59 = paddle._C_ops.add(add_56, multiply_14) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_59, parameter_192, parameter_191, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_191, parameter_192 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_45 = paddle._C_ops.matmul(layer_norm_60, parameter_190, False, False) + del parameter_190 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_60 = paddle._C_ops.add(matmul_45, parameter_189) + del parameter_189 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_8 = paddle._C_ops.gelu(add_60, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_46 = paddle._C_ops.matmul(gelu_8, parameter_188, False, False) + del parameter_188 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_61 = paddle._C_ops.add(matmul_46, parameter_187) + del parameter_187 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_54 = paddle._C_ops.shape64(add_61) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_96 = paddle._C_ops.slice( + shape64_54, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_54 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_76 = [slice_96, full_39, full_39] + del slice_96 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_74 = paddle._C_ops.stack(combine_76, 0) + del combine_76 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_15 = paddle._C_ops.uniform( + stack_74, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_74 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_190 = paddle._C_ops.add(full_9, uniform_15) + del uniform_15 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_15 = paddle._C_ops.floor(add_190) + del add_190 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_15 = paddle._C_ops.divide(add_61, full_9) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_15 = paddle._C_ops.multiply(divide_15, floor_15) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_62 = paddle._C_ops.add(add_59, multiply_15) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_55 = paddle._C_ops.shape64(add_62) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_97 = paddle._C_ops.slice( + shape64_55, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_55 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_186, parameter_185, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_185, parameter_186 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_77 = [slice_97, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_75 = paddle._C_ops.stack(combine_77, 0) + del combine_77 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(layer_norm_63, stack_75) + del stack_75 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_56 = paddle._C_ops.shape64(reshape_81) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_98 = paddle._C_ops.slice( + shape64_56, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_56 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_81, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_57 = paddle._C_ops.shape64(roll_8) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_99 = paddle._C_ops.slice( + shape64_57, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_57 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_78 = [slice_99, full_56, full_28, full_56, full_28, full_43] + del slice_99 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_76 = paddle._C_ops.stack(combine_78, 0) + del combine_78 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_265 = paddle._C_ops.reshape(roll_8, stack_76) + del stack_76 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_55 = paddle._C_ops.transpose(reshape_265, [0, 1, 3, 2, 4, 5]) + del reshape_265 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_55, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_64 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + full_64, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_64 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__45 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__46 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__47 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__48 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__49 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__50 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__51 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_266 = paddle._C_ops.reshape(set_value__4, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_150 = paddle._C_ops.transpose(reshape_266, [0, 1, 3, 2, 4, 5]) + del reshape_266 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_267 = paddle._C_ops.reshape(transpose_150, full_int_array_31) + del transpose_150 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_268 = paddle._C_ops.reshape(reshape_267, full_int_array_32) + del reshape_267 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_49 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_50 = paddle._C_ops.unsqueeze(reshape_268, full_int_array_0) + del reshape_268 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_49, unsqueeze_50) + del unsqueeze_49, unsqueeze_50 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_60, subtract_4) + del not_equal_4, subtract_4 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_9 = paddle._C_ops.where(equal_4, full_61, where_8) + del equal_4, where_8 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_58 = paddle._C_ops.shape64(reshape_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_100 = paddle._C_ops.slice( + shape64_58, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_58 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_47 = paddle._C_ops.matmul(reshape_83, parameter_184, False, False) + del parameter_184 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_63 = paddle._C_ops.add(matmul_47, parameter_183) + del parameter_183 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_79 = [slice_100, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_77 = paddle._C_ops.stack(combine_79, 0) + del combine_79 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_269 = paddle._C_ops.reshape(add_63, stack_77) + del stack_77 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_56 = paddle._C_ops.transpose(reshape_269, [2, 0, 3, 1, 4]) + del reshape_269 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_101 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_102 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_101, full_0, float("0"), True) + del slice_101 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_57 = paddle._C_ops.transpose(slice_102, [0, 1, 3, 2]) + del slice_102 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_48 = paddle._C_ops.matmul(scale_9, transpose_57, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_84 = paddle._C_ops.reshape(data_19, full_int_array_12) + del data_19 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_9 = paddle._C_ops.index_select(data_20, reshape_84, 0) + del data_20 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_270 = paddle._C_ops.reshape(index_select_9, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_58 = paddle._C_ops.transpose(reshape_270, [2, 0, 1]) + del reshape_270 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(transpose_58, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_64 = paddle._C_ops.add(matmul_48, unsqueeze_13) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_4 = paddle._C_ops.floor_divide(slice_100, full_62) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_80 = [floor_divide_4, full_46, full_57, full_29, full_29] + del floor_divide_4 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_78 = paddle._C_ops.stack(combine_80, 0) + del combine_80 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_85 = paddle._C_ops.reshape(add_64, stack_78) + del stack_78 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_51 = paddle._C_ops.unsqueeze(where_9, full_int_array_8) + del where_9 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(unsqueeze_51, full_int_array_7) + del unsqueeze_51 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_65 = paddle._C_ops.add(reshape_85, unsqueeze_14) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_81 = [slice_100, full_57, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_79 = paddle._C_ops.stack(combine_81, 0) + del combine_81 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_271 = paddle._C_ops.reshape(add_65, stack_79) + del stack_79 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_9 = paddle._C_ops.softmax(reshape_271, -1) + del reshape_271 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_133 = paddle._C_ops.matmul(softmax_9, slice_9, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_59 = paddle._C_ops.transpose(matmul_133, [0, 2, 1, 3]) + del matmul_133 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_82 = [slice_100, full_29, full_43] + del slice_100 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_80 = paddle._C_ops.stack(combine_82, 0) + del combine_82 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(transpose_59, stack_80) + del stack_80 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_49 = paddle._C_ops.matmul(reshape_86, parameter_182, False, False) + del parameter_182 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_66 = paddle._C_ops.add(matmul_49, parameter_181) + del parameter_181 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(add_66, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_272 = paddle._C_ops.reshape(reshape_87, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_60 = paddle._C_ops.transpose(reshape_272, [0, 1, 3, 2, 4, 5]) + del reshape_272 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(transpose_60, full_int_array_41) + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_88, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_83 = [slice_97, full_58, full_43] + del slice_97 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_81 = paddle._C_ops.stack(combine_83, 0) + del combine_83 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(roll_9, stack_81) + del stack_81 + + # pd_op.full: (xf32) <- () + full_10 = paddle._C_ops.full( + [], + float("0.882609"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_115 = full_10 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_59 = paddle._C_ops.shape64(reshape_89) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_103 = paddle._C_ops.slice( + shape64_59, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_59 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_84 = [slice_103, full_39, full_39] + del slice_103 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_82 = paddle._C_ops.stack(combine_84, 0) + del combine_84 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_16 = paddle._C_ops.uniform( + stack_82, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_82 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_191 = paddle._C_ops.add(full_10, uniform_16) + del uniform_16 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_16 = paddle._C_ops.floor(add_191) + del add_191 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_16 = paddle._C_ops.divide(reshape_89, full_10) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_16 = paddle._C_ops.multiply(divide_16, floor_16) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_67 = paddle._C_ops.add(add_62, multiply_16) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_67, parameter_180, parameter_179, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_179, parameter_180 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_66, parameter_178, False, False) + del parameter_178 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_68 = paddle._C_ops.add(matmul_50, parameter_177) + del parameter_177 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_9 = paddle._C_ops.gelu(add_68, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_51 = paddle._C_ops.matmul(gelu_9, parameter_176, False, False) + del parameter_176 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_69 = paddle._C_ops.add(matmul_51, parameter_175) + del parameter_175 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_60 = paddle._C_ops.shape64(add_69) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_104 = paddle._C_ops.slice( + shape64_60, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_60 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_85 = [slice_104, full_39, full_39] + del slice_104 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_83 = paddle._C_ops.stack(combine_85, 0) + del combine_85 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_17 = paddle._C_ops.uniform( + stack_83, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_83 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_192 = paddle._C_ops.add(full_10, uniform_17) + del uniform_17 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_17 = paddle._C_ops.floor(add_192) + del add_192 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_17 = paddle._C_ops.divide(add_69, full_10) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_17 = paddle._C_ops.multiply(divide_17, floor_17) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_70 = paddle._C_ops.add(add_67, multiply_17) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_61 = paddle._C_ops.shape64(add_70) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_105 = paddle._C_ops.slice( + shape64_61, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_61 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_174, parameter_173, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_173, parameter_174 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_86 = [slice_105, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_84 = paddle._C_ops.stack(combine_86, 0) + del combine_86 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(layer_norm_69, stack_84) + del stack_84 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_62 = paddle._C_ops.shape64(reshape_90) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_106 = paddle._C_ops.slice( + shape64_62, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_62 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_87 = [slice_106, full_56, full_28, full_56, full_28, full_43] + del slice_106 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_85 = paddle._C_ops.stack(combine_87, 0) + del combine_87 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_273 = paddle._C_ops.reshape(reshape_90, stack_85) + del stack_85 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_61 = paddle._C_ops.transpose(reshape_273, [0, 1, 3, 2, 4, 5]) + del reshape_273 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_91 = paddle._C_ops.reshape(transpose_61, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_92 = paddle._C_ops.reshape(reshape_91, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_63 = paddle._C_ops.shape64(reshape_92) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_107 = paddle._C_ops.slice( + shape64_63, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_63 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_52 = paddle._C_ops.matmul(reshape_92, parameter_172, False, False) + del parameter_172 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_71 = paddle._C_ops.add(matmul_52, parameter_171) + del parameter_171 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_88 = [slice_107, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_86 = paddle._C_ops.stack(combine_88, 0) + del combine_88 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_274 = paddle._C_ops.reshape(add_71, stack_86) + del stack_86 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_62 = paddle._C_ops.transpose(reshape_274, [2, 0, 3, 1, 4]) + del reshape_274 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_108 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_109 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_108, full_0, float("0"), True) + del slice_108 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_63 = paddle._C_ops.transpose(slice_109, [0, 1, 3, 2]) + del slice_109 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_53 = paddle._C_ops.matmul(scale_10, transpose_63, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_93 = paddle._C_ops.reshape(data_21, full_int_array_12) + del data_21 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_10 = paddle._C_ops.index_select(data_22, reshape_93, 0) + del data_22 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_275 = paddle._C_ops.reshape(index_select_10, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_64 = paddle._C_ops.transpose(reshape_275, [2, 0, 1]) + del reshape_275 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_64, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_193 = paddle._C_ops.add(matmul_53, unsqueeze_15) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_10 = paddle._C_ops.softmax(add_193, -1) + del add_193 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_134 = paddle._C_ops.matmul(softmax_10, slice_10, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_65 = paddle._C_ops.transpose(matmul_134, [0, 2, 1, 3]) + del matmul_134 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_89 = [slice_107, full_29, full_43] + del slice_107 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_87 = paddle._C_ops.stack(combine_89, 0) + del combine_89 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_94 = paddle._C_ops.reshape(transpose_65, stack_87) + del stack_87 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_54 = paddle._C_ops.matmul(reshape_94, parameter_170, False, False) + del parameter_170 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_72 = paddle._C_ops.add(matmul_54, parameter_169) + del parameter_169 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_95 = paddle._C_ops.reshape(add_72, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_276 = paddle._C_ops.reshape(reshape_95, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_66 = paddle._C_ops.transpose(reshape_276, [0, 1, 3, 2, 4, 5]) + del reshape_276 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_66, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_90 = [slice_105, full_58, full_43] + del slice_105 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_88 = paddle._C_ops.stack(combine_90, 0) + del combine_90 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, stack_88) + del stack_88 + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], + float("0.869565"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_124 = full_11 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_64 = paddle._C_ops.shape64(reshape_97) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_110 = paddle._C_ops.slice( + shape64_64, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_64 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_91 = [slice_110, full_39, full_39] + del slice_110 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_89 = paddle._C_ops.stack(combine_91, 0) + del combine_91 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_18 = paddle._C_ops.uniform( + stack_89, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_89 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_194 = paddle._C_ops.add(full_11, uniform_18) + del uniform_18 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_18 = paddle._C_ops.floor(add_194) + del add_194 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_18 = paddle._C_ops.divide(reshape_97, full_11) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_18 = paddle._C_ops.multiply(divide_18, floor_18) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_73 = paddle._C_ops.add(add_70, multiply_18) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_168, parameter_167, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_167, parameter_168 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_72, parameter_166, False, False) + del parameter_166 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_74 = paddle._C_ops.add(matmul_55, parameter_165) + del parameter_165 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_10 = paddle._C_ops.gelu(add_74, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_56 = paddle._C_ops.matmul(gelu_10, parameter_164, False, False) + del parameter_164 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_75 = paddle._C_ops.add(matmul_56, parameter_163) + del parameter_163 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_65 = paddle._C_ops.shape64(add_75) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_111 = paddle._C_ops.slice( + shape64_65, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_65 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_92 = [slice_111, full_39, full_39] + del slice_111 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_90 = paddle._C_ops.stack(combine_92, 0) + del combine_92 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_19 = paddle._C_ops.uniform( + stack_90, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_90 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_195 = paddle._C_ops.add(full_11, uniform_19) + del uniform_19 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_19 = paddle._C_ops.floor(add_195) + del add_195 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_19 = paddle._C_ops.divide(add_75, full_11) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_19 = paddle._C_ops.multiply(divide_19, floor_19) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_76 = paddle._C_ops.add(add_73, multiply_19) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_66 = paddle._C_ops.shape64(add_76) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_112 = paddle._C_ops.slice( + shape64_66, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_66 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_162, parameter_161, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_161, parameter_162 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_93 = [slice_112, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_91 = paddle._C_ops.stack(combine_93, 0) + del combine_93 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(layer_norm_75, stack_91) + del stack_91 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_67 = paddle._C_ops.shape64(reshape_98) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_113 = paddle._C_ops.slice( + shape64_67, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_67 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_98, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_68 = paddle._C_ops.shape64(roll_10) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_114 = paddle._C_ops.slice( + shape64_68, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_68 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_94 = [slice_114, full_56, full_28, full_56, full_28, full_43] + del slice_114 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_92 = paddle._C_ops.stack(combine_94, 0) + del combine_94 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_277 = paddle._C_ops.reshape(roll_10, stack_92) + del stack_92 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_67 = paddle._C_ops.transpose(reshape_277, [0, 1, 3, 2, 4, 5]) + del reshape_277 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_99 = paddle._C_ops.reshape(transpose_67, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(reshape_99, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_65 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + full_65, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_65 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__52 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__54 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__53 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__55 = paddle._C_ops.set_value_( + set_value__54, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__54 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__56 = paddle._C_ops.set_value_( + set_value__55, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__55 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__57 = paddle._C_ops.set_value_( + set_value__56, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__56 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__58 = paddle._C_ops.set_value_( + set_value__57, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__57 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__59 = paddle._C_ops.set_value_( + set_value__58, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__58 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__59, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__59 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_278 = paddle._C_ops.reshape(set_value__5, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_151 = paddle._C_ops.transpose(reshape_278, [0, 1, 3, 2, 4, 5]) + del reshape_278 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_279 = paddle._C_ops.reshape(transpose_151, full_int_array_31) + del transpose_151 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_280 = paddle._C_ops.reshape(reshape_279, full_int_array_32) + del reshape_279 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_52 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_53 = paddle._C_ops.unsqueeze(reshape_280, full_int_array_0) + del reshape_280 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_52, unsqueeze_53) + del unsqueeze_52, unsqueeze_53 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_60, subtract_5) + del not_equal_5, subtract_5 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_11 = paddle._C_ops.where(equal_5, full_61, where_10) + del equal_5, where_10 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_69 = paddle._C_ops.shape64(reshape_100) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_115 = paddle._C_ops.slice( + shape64_69, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_69 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_57 = paddle._C_ops.matmul(reshape_100, parameter_160, False, False) + del parameter_160 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_77 = paddle._C_ops.add(matmul_57, parameter_159) + del parameter_159 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_95 = [slice_115, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_93 = paddle._C_ops.stack(combine_95, 0) + del combine_95 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_281 = paddle._C_ops.reshape(add_77, stack_93) + del stack_93 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_68 = paddle._C_ops.transpose(reshape_281, [2, 0, 3, 1, 4]) + del reshape_281 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_116 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_117 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_116, full_0, float("0"), True) + del slice_116 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_69 = paddle._C_ops.transpose(slice_117, [0, 1, 3, 2]) + del slice_117 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_58 = paddle._C_ops.matmul(scale_11, transpose_69, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_101 = paddle._C_ops.reshape(data_23, full_int_array_12) + del data_23 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_11 = paddle._C_ops.index_select(data_24, reshape_101, 0) + del data_24 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_282 = paddle._C_ops.reshape(index_select_11, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_70 = paddle._C_ops.transpose(reshape_282, [2, 0, 1]) + del reshape_282 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(transpose_70, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_78 = paddle._C_ops.add(matmul_58, unsqueeze_16) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_5 = paddle._C_ops.floor_divide(slice_115, full_62) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_96 = [floor_divide_5, full_46, full_57, full_29, full_29] + del floor_divide_5 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_94 = paddle._C_ops.stack(combine_96, 0) + del combine_96 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_102 = paddle._C_ops.reshape(add_78, stack_94) + del stack_94 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_54 = paddle._C_ops.unsqueeze(where_11, full_int_array_8) + del where_11 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_54, full_int_array_7) + del unsqueeze_54 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_79 = paddle._C_ops.add(reshape_102, unsqueeze_17) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_97 = [slice_115, full_57, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_95 = paddle._C_ops.stack(combine_97, 0) + del combine_97 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_283 = paddle._C_ops.reshape(add_79, stack_95) + del stack_95 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_11 = paddle._C_ops.softmax(reshape_283, -1) + del reshape_283 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_135 = paddle._C_ops.matmul(softmax_11, slice_11, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_71 = paddle._C_ops.transpose(matmul_135, [0, 2, 1, 3]) + del matmul_135 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_98 = [slice_115, full_29, full_43] + del slice_115 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_96 = paddle._C_ops.stack(combine_98, 0) + del combine_98 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_103 = paddle._C_ops.reshape(transpose_71, stack_96) + del stack_96 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_59 = paddle._C_ops.matmul(reshape_103, parameter_158, False, False) + del parameter_158 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_80 = paddle._C_ops.add(matmul_59, parameter_157) + del parameter_157 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(add_80, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_284 = paddle._C_ops.reshape(reshape_104, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_72 = paddle._C_ops.transpose(reshape_284, [0, 1, 3, 2, 4, 5]) + del reshape_284 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_105 = paddle._C_ops.reshape(transpose_72, full_int_array_41) + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_105, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_99 = [slice_112, full_58, full_43] + del slice_112 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_97 = paddle._C_ops.stack(combine_99, 0) + del combine_99 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_106 = paddle._C_ops.reshape(roll_11, stack_97) + del stack_97 + + # pd_op.full: (xf32) <- () + full_12 = paddle._C_ops.full( + [], + float("0.856522"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_135 = full_12 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_70 = paddle._C_ops.shape64(reshape_106) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_118 = paddle._C_ops.slice( + shape64_70, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_70 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_100 = [slice_118, full_39, full_39] + del slice_118 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_98 = paddle._C_ops.stack(combine_100, 0) + del combine_100 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_20 = paddle._C_ops.uniform( + stack_98, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_98 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_196 = paddle._C_ops.add(full_12, uniform_20) + del uniform_20 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_20 = paddle._C_ops.floor(add_196) + del add_196 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_20 = paddle._C_ops.divide(reshape_106, full_12) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_20 = paddle._C_ops.multiply(divide_20, floor_20) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_81 = paddle._C_ops.add(add_76, multiply_20) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_78, parameter_154, False, False) + del parameter_154 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_82 = paddle._C_ops.add(matmul_60, parameter_153) + del parameter_153 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_11 = paddle._C_ops.gelu(add_82, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_61 = paddle._C_ops.matmul(gelu_11, parameter_152, False, False) + del parameter_152 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_83 = paddle._C_ops.add(matmul_61, parameter_151) + del parameter_151 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_71 = paddle._C_ops.shape64(add_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_119 = paddle._C_ops.slice( + shape64_71, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_71 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_101 = [slice_119, full_39, full_39] + del slice_119 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_99 = paddle._C_ops.stack(combine_101, 0) + del combine_101 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_21 = paddle._C_ops.uniform( + stack_99, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_99 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_197 = paddle._C_ops.add(full_12, uniform_21) + del uniform_21 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_21 = paddle._C_ops.floor(add_197) + del add_197 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_21 = paddle._C_ops.divide(add_83, full_12) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_21 = paddle._C_ops.multiply(divide_21, floor_21) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_84 = paddle._C_ops.add(add_81, multiply_21) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_72 = paddle._C_ops.shape64(add_84) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_120 = paddle._C_ops.slice( + shape64_72, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_72 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_84, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_102 = [slice_120, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_100 = paddle._C_ops.stack(combine_102, 0) + del combine_102 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(layer_norm_81, stack_100) + del stack_100 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_73 = paddle._C_ops.shape64(reshape_107) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_121 = paddle._C_ops.slice( + shape64_73, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_73 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_103 = [slice_121, full_56, full_28, full_56, full_28, full_43] + del slice_121 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_101 = paddle._C_ops.stack(combine_103, 0) + del combine_103 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_285 = paddle._C_ops.reshape(reshape_107, stack_101) + del stack_101 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_73 = paddle._C_ops.transpose(reshape_285, [0, 1, 3, 2, 4, 5]) + del reshape_285 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_73, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_74 = paddle._C_ops.shape64(reshape_109) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_122 = paddle._C_ops.slice( + shape64_74, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_74 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_62 = paddle._C_ops.matmul(reshape_109, parameter_148, False, False) + del parameter_148 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_85 = paddle._C_ops.add(matmul_62, parameter_147) + del parameter_147 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_104 = [slice_122, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_102 = paddle._C_ops.stack(combine_104, 0) + del combine_104 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_286 = paddle._C_ops.reshape(add_85, stack_102) + del stack_102 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_286, [2, 0, 3, 1, 4]) + del reshape_286 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_123 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_124 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_12 = paddle._C_ops.scale(slice_123, full_0, float("0"), True) + del slice_123 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_124, [0, 1, 3, 2]) + del slice_124 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_63 = paddle._C_ops.matmul(scale_12, transpose_75, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_110 = paddle._C_ops.reshape(data_25, full_int_array_12) + del data_25 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_12 = paddle._C_ops.index_select(data_26, reshape_110, 0) + del data_26 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_287 = paddle._C_ops.reshape(index_select_12, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_76 = paddle._C_ops.transpose(reshape_287, [2, 0, 1]) + del reshape_287 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_198 = paddle._C_ops.add(matmul_63, unsqueeze_18) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_12 = paddle._C_ops.softmax(add_198, -1) + del add_198 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_136 = paddle._C_ops.matmul(softmax_12, slice_12, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_136, [0, 2, 1, 3]) + del matmul_136 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_105 = [slice_122, full_29, full_43] + del slice_122 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_103 = paddle._C_ops.stack(combine_105, 0) + del combine_105 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(transpose_77, stack_103) + del stack_103 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_64 = paddle._C_ops.matmul(reshape_111, parameter_146, False, False) + del parameter_146 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_86 = paddle._C_ops.add(matmul_64, parameter_145) + del parameter_145 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_112 = paddle._C_ops.reshape(add_86, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_288 = paddle._C_ops.reshape(reshape_112, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_78 = paddle._C_ops.transpose(reshape_288, [0, 1, 3, 2, 4, 5]) + del reshape_288 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_113 = paddle._C_ops.reshape(transpose_78, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_106 = [slice_120, full_58, full_43] + del slice_120 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_104 = paddle._C_ops.stack(combine_106, 0) + del combine_106 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_114 = paddle._C_ops.reshape(reshape_113, stack_104) + del stack_104 + + # pd_op.full: (xf32) <- () + full_13 = paddle._C_ops.full( + [], + float("0.843478"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_144 = full_13 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_75 = paddle._C_ops.shape64(reshape_114) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_125 = paddle._C_ops.slice( + shape64_75, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_75 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_107 = [slice_125, full_39, full_39] + del slice_125 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_105 = paddle._C_ops.stack(combine_107, 0) + del combine_107 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_22 = paddle._C_ops.uniform( + stack_105, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_105 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_199 = paddle._C_ops.add(full_13, uniform_22) + del uniform_22 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_22 = paddle._C_ops.floor(add_199) + del add_199 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_22 = paddle._C_ops.divide(reshape_114, full_13) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_22 = paddle._C_ops.multiply(divide_22, floor_22) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_87 = paddle._C_ops.add(add_84, multiply_22) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_87, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_84, parameter_142, False, False) + del parameter_142 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_88 = paddle._C_ops.add(matmul_65, parameter_141) + del parameter_141 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_12 = paddle._C_ops.gelu(add_88, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_66 = paddle._C_ops.matmul(gelu_12, parameter_140, False, False) + del parameter_140 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_89 = paddle._C_ops.add(matmul_66, parameter_139) + del parameter_139 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_76 = paddle._C_ops.shape64(add_89) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_126 = paddle._C_ops.slice( + shape64_76, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_76 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_108 = [slice_126, full_39, full_39] + del slice_126 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_106 = paddle._C_ops.stack(combine_108, 0) + del combine_108 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_23 = paddle._C_ops.uniform( + stack_106, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_106 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_200 = paddle._C_ops.add(full_13, uniform_23) + del uniform_23 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_23 = paddle._C_ops.floor(add_200) + del add_200 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_23 = paddle._C_ops.divide(add_89, full_13) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_23 = paddle._C_ops.multiply(divide_23, floor_23) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_90 = paddle._C_ops.add(add_87, multiply_23) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_77 = paddle._C_ops.shape64(add_90) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_127 = paddle._C_ops.slice( + shape64_77, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_77 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_87, layer_norm_88, layer_norm_89 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_109 = [slice_127, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_107 = paddle._C_ops.stack(combine_109, 0) + del combine_109 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(layer_norm_87, stack_107) + del stack_107 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_78 = paddle._C_ops.shape64(reshape_115) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_128 = paddle._C_ops.slice( + shape64_78, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_78 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_12 = paddle._C_ops.roll(reshape_115, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_79 = paddle._C_ops.shape64(roll_12) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_129 = paddle._C_ops.slice( + shape64_79, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_79 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_110 = [slice_129, full_56, full_28, full_56, full_28, full_43] + del slice_129 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_108 = paddle._C_ops.stack(combine_110, 0) + del combine_110 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_289 = paddle._C_ops.reshape(roll_12, stack_108) + del stack_108 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_79 = paddle._C_ops.transpose(reshape_289, [0, 1, 3, 2, 4, 5]) + del reshape_289 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_116 = paddle._C_ops.reshape(transpose_79, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_117 = paddle._C_ops.reshape(reshape_116, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_66 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__60 = paddle._C_ops.set_value_( + full_66, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_66 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__61 = paddle._C_ops.set_value_( + set_value__60, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__60 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__62 = paddle._C_ops.set_value_( + set_value__61, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__61 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__63 = paddle._C_ops.set_value_( + set_value__62, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__62 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__64 = paddle._C_ops.set_value_( + set_value__63, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__63 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__65 = paddle._C_ops.set_value_( + set_value__64, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__64 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__66 = paddle._C_ops.set_value_( + set_value__65, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__65 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__67 = paddle._C_ops.set_value_( + set_value__66, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__66 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + set_value__67, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__67 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_290 = paddle._C_ops.reshape(set_value__6, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_152 = paddle._C_ops.transpose(reshape_290, [0, 1, 3, 2, 4, 5]) + del reshape_290 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_291 = paddle._C_ops.reshape(transpose_152, full_int_array_31) + del transpose_152 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_292 = paddle._C_ops.reshape(reshape_291, full_int_array_32) + del reshape_291 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_55 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_56 = paddle._C_ops.unsqueeze(reshape_292, full_int_array_0) + del reshape_292 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_6 = paddle._C_ops.subtract(unsqueeze_55, unsqueeze_56) + del unsqueeze_55, unsqueeze_56 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_6 = paddle._C_ops.not_equal(subtract_6, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_12 = paddle._C_ops.where(not_equal_6, full_60, subtract_6) + del not_equal_6, subtract_6 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_6 = paddle._C_ops.equal(where_12, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_13 = paddle._C_ops.where(equal_6, full_61, where_12) + del equal_6, where_12 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_80 = paddle._C_ops.shape64(reshape_117) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_130 = paddle._C_ops.slice( + shape64_80, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_80 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_67 = paddle._C_ops.matmul(reshape_117, parameter_136, False, False) + del parameter_136 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_91 = paddle._C_ops.add(matmul_67, parameter_135) + del parameter_135 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_111 = [slice_130, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_109 = paddle._C_ops.stack(combine_111, 0) + del combine_111 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_293 = paddle._C_ops.reshape(add_91, stack_109) + del stack_109 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_80 = paddle._C_ops.transpose(reshape_293, [2, 0, 3, 1, 4]) + del reshape_293 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_131 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_132 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_80, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_13 = paddle._C_ops.scale(slice_131, full_0, float("0"), True) + del slice_131 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_81 = paddle._C_ops.transpose(slice_132, [0, 1, 3, 2]) + del slice_132 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_68 = paddle._C_ops.matmul(scale_13, transpose_81, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_118 = paddle._C_ops.reshape(data_27, full_int_array_12) + del data_27 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_13 = paddle._C_ops.index_select(data_28, reshape_118, 0) + del data_28 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_294 = paddle._C_ops.reshape(index_select_13, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_82 = paddle._C_ops.transpose(reshape_294, [2, 0, 1]) + del reshape_294 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(transpose_82, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_92 = paddle._C_ops.add(matmul_68, unsqueeze_19) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_6 = paddle._C_ops.floor_divide(slice_130, full_62) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_112 = [floor_divide_6, full_46, full_57, full_29, full_29] + del floor_divide_6 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_110 = paddle._C_ops.stack(combine_112, 0) + del combine_112 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_119 = paddle._C_ops.reshape(add_92, stack_110) + del stack_110 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_57 = paddle._C_ops.unsqueeze(where_13, full_int_array_8) + del where_13 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(unsqueeze_57, full_int_array_7) + del unsqueeze_57 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_93 = paddle._C_ops.add(reshape_119, unsqueeze_20) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_113 = [slice_130, full_57, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_111 = paddle._C_ops.stack(combine_113, 0) + del combine_113 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_295 = paddle._C_ops.reshape(add_93, stack_111) + del stack_111 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_13 = paddle._C_ops.softmax(reshape_295, -1) + del reshape_295 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_137 = paddle._C_ops.matmul(softmax_13, slice_13, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_83 = paddle._C_ops.transpose(matmul_137, [0, 2, 1, 3]) + del matmul_137 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_114 = [slice_130, full_29, full_43] + del slice_130 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_112 = paddle._C_ops.stack(combine_114, 0) + del combine_114 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_120 = paddle._C_ops.reshape(transpose_83, stack_112) + del stack_112 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_69 = paddle._C_ops.matmul(reshape_120, parameter_134, False, False) + del parameter_134 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_94 = paddle._C_ops.add(matmul_69, parameter_133) + del parameter_133 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(add_94, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_296 = paddle._C_ops.reshape(reshape_121, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_84 = paddle._C_ops.transpose(reshape_296, [0, 1, 3, 2, 4, 5]) + del reshape_296 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_122 = paddle._C_ops.reshape(transpose_84, full_int_array_41) + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_13 = paddle._C_ops.roll(reshape_122, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_115 = [slice_127, full_58, full_43] + del slice_127 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_113 = paddle._C_ops.stack(combine_115, 0) + del combine_115 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_123 = paddle._C_ops.reshape(roll_13, stack_113) + del stack_113 + + # pd_op.full: (xf32) <- () + full_14 = paddle._C_ops.full( + [], + float("0.830435"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_155 = full_14 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_81 = paddle._C_ops.shape64(reshape_123) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_133 = paddle._C_ops.slice( + shape64_81, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_81 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_116 = [slice_133, full_39, full_39] + del slice_133 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_114 = paddle._C_ops.stack(combine_116, 0) + del combine_116 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_24 = paddle._C_ops.uniform( + stack_114, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_114 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_201 = paddle._C_ops.add(full_14, uniform_24) + del uniform_24 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_24 = paddle._C_ops.floor(add_201) + del add_201 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_24 = paddle._C_ops.divide(reshape_123, full_14) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_24 = paddle._C_ops.multiply(divide_24, floor_24) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_95 = paddle._C_ops.add(add_90, multiply_24) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_90, layer_norm_91, layer_norm_92 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_95, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_90, parameter_130, False, False) + del parameter_130 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_96 = paddle._C_ops.add(matmul_70, parameter_129) + del parameter_129 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_13 = paddle._C_ops.gelu(add_96, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_71 = paddle._C_ops.matmul(gelu_13, parameter_128, False, False) + del parameter_128 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_97 = paddle._C_ops.add(matmul_71, parameter_127) + del parameter_127 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_82 = paddle._C_ops.shape64(add_97) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_134 = paddle._C_ops.slice( + shape64_82, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_82 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_117 = [slice_134, full_39, full_39] + del slice_134 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_115 = paddle._C_ops.stack(combine_117, 0) + del combine_117 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_25 = paddle._C_ops.uniform( + stack_115, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_115 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_202 = paddle._C_ops.add(full_14, uniform_25) + del uniform_25 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_25 = paddle._C_ops.floor(add_202) + del add_202 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_25 = paddle._C_ops.divide(add_97, full_14) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_25 = paddle._C_ops.multiply(divide_25, floor_25) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_98 = paddle._C_ops.add(add_95, multiply_25) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_83 = paddle._C_ops.shape64(add_98) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_135 = paddle._C_ops.slice( + shape64_83, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_83 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_93, layer_norm_94, layer_norm_95 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_126, parameter_125, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_125, parameter_126 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_118 = [slice_135, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_116 = paddle._C_ops.stack(combine_118, 0) + del combine_118 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_124 = paddle._C_ops.reshape(layer_norm_93, stack_116) + del stack_116 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_84 = paddle._C_ops.shape64(reshape_124) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_136 = paddle._C_ops.slice( + shape64_84, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_84 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_119 = [slice_136, full_56, full_28, full_56, full_28, full_43] + del slice_136 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_117 = paddle._C_ops.stack(combine_119, 0) + del combine_119 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_297 = paddle._C_ops.reshape(reshape_124, stack_117) + del stack_117 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_85 = paddle._C_ops.transpose(reshape_297, [0, 1, 3, 2, 4, 5]) + del reshape_297 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_85, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_85 = paddle._C_ops.shape64(reshape_126) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_137 = paddle._C_ops.slice( + shape64_85, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_85 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_72 = paddle._C_ops.matmul(reshape_126, parameter_124, False, False) + del parameter_124 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_99 = paddle._C_ops.add(matmul_72, parameter_123) + del parameter_123 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_120 = [slice_137, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_118 = paddle._C_ops.stack(combine_120, 0) + del combine_120 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_298 = paddle._C_ops.reshape(add_99, stack_118) + del stack_118 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_86 = paddle._C_ops.transpose(reshape_298, [2, 0, 3, 1, 4]) + del reshape_298 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_138 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_139 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_86, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_14 = paddle._C_ops.scale(slice_138, full_0, float("0"), True) + del slice_138 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_87 = paddle._C_ops.transpose(slice_139, [0, 1, 3, 2]) + del slice_139 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_73 = paddle._C_ops.matmul(scale_14, transpose_87, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_127 = paddle._C_ops.reshape(data_29, full_int_array_12) + del data_29 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_14 = paddle._C_ops.index_select(data_30, reshape_127, 0) + del data_30 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_299 = paddle._C_ops.reshape(index_select_14, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_88 = paddle._C_ops.transpose(reshape_299, [2, 0, 1]) + del reshape_299 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_88, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_203 = paddle._C_ops.add(matmul_73, unsqueeze_21) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_14 = paddle._C_ops.softmax(add_203, -1) + del add_203 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_138 = paddle._C_ops.matmul(softmax_14, slice_14, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_89 = paddle._C_ops.transpose(matmul_138, [0, 2, 1, 3]) + del matmul_138 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_121 = [slice_137, full_29, full_43] + del slice_137 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_119 = paddle._C_ops.stack(combine_121, 0) + del combine_121 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_128 = paddle._C_ops.reshape(transpose_89, stack_119) + del stack_119 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_74 = paddle._C_ops.matmul(reshape_128, parameter_122, False, False) + del parameter_122 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_100 = paddle._C_ops.add(matmul_74, parameter_121) + del parameter_121 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_129 = paddle._C_ops.reshape(add_100, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_300 = paddle._C_ops.reshape(reshape_129, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_90 = paddle._C_ops.transpose(reshape_300, [0, 1, 3, 2, 4, 5]) + del reshape_300 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_130 = paddle._C_ops.reshape(transpose_90, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_122 = [slice_135, full_58, full_43] + del slice_135 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_120 = paddle._C_ops.stack(combine_122, 0) + del combine_122 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_131 = paddle._C_ops.reshape(reshape_130, stack_120) + del stack_120 + + # pd_op.full: (xf32) <- () + full_15 = paddle._C_ops.full( + [], + float("0.817391"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_164 = full_15 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_86 = paddle._C_ops.shape64(reshape_131) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_140 = paddle._C_ops.slice( + shape64_86, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_86 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_123 = [slice_140, full_39, full_39] + del slice_140 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_121 = paddle._C_ops.stack(combine_123, 0) + del combine_123 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_26 = paddle._C_ops.uniform( + stack_121, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_121 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_204 = paddle._C_ops.add(full_15, uniform_26) + del uniform_26 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_26 = paddle._C_ops.floor(add_204) + del add_204 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_26 = paddle._C_ops.divide(reshape_131, full_15) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_26 = paddle._C_ops.multiply(divide_26, floor_26) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_101 = paddle._C_ops.add(add_98, multiply_26) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_96, layer_norm_97, layer_norm_98 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_120, parameter_119, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_119, parameter_120 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_96, parameter_118, False, False) + del parameter_118 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_102 = paddle._C_ops.add(matmul_75, parameter_117) + del parameter_117 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_14 = paddle._C_ops.gelu(add_102, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_76 = paddle._C_ops.matmul(gelu_14, parameter_116, False, False) + del parameter_116 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_103 = paddle._C_ops.add(matmul_76, parameter_115) + del parameter_115 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_87 = paddle._C_ops.shape64(add_103) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_141 = paddle._C_ops.slice( + shape64_87, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_87 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_124 = [slice_141, full_39, full_39] + del slice_141 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_122 = paddle._C_ops.stack(combine_124, 0) + del combine_124 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_27 = paddle._C_ops.uniform( + stack_122, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_122 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_205 = paddle._C_ops.add(full_15, uniform_27) + del uniform_27 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_27 = paddle._C_ops.floor(add_205) + del add_205 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_27 = paddle._C_ops.divide(add_103, full_15) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_27 = paddle._C_ops.multiply(divide_27, floor_27) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_104 = paddle._C_ops.add(add_101, multiply_27) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_88 = paddle._C_ops.shape64(add_104) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_142 = paddle._C_ops.slice( + shape64_88, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_88 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_99, layer_norm_100, layer_norm_101 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_104, parameter_114, parameter_113, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_113, parameter_114 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_125 = [slice_142, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_123 = paddle._C_ops.stack(combine_125, 0) + del combine_125 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_132 = paddle._C_ops.reshape(layer_norm_99, stack_123) + del stack_123 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_89 = paddle._C_ops.shape64(reshape_132) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_143 = paddle._C_ops.slice( + shape64_89, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_89 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_14 = paddle._C_ops.roll(reshape_132, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_90 = paddle._C_ops.shape64(roll_14) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_144 = paddle._C_ops.slice( + shape64_90, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_126 = [slice_144, full_56, full_28, full_56, full_28, full_43] + del slice_144 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_124 = paddle._C_ops.stack(combine_126, 0) + del combine_126 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_301 = paddle._C_ops.reshape(roll_14, stack_124) + del stack_124 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_91 = paddle._C_ops.transpose(reshape_301, [0, 1, 3, 2, 4, 5]) + del reshape_301 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_91, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_67 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__68 = paddle._C_ops.set_value_( + full_67, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_67 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__69 = paddle._C_ops.set_value_( + set_value__68, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__68 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__70 = paddle._C_ops.set_value_( + set_value__69, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__69 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__71 = paddle._C_ops.set_value_( + set_value__70, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__70 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__72 = paddle._C_ops.set_value_( + set_value__71, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__71 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__73 = paddle._C_ops.set_value_( + set_value__72, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__72 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__74 = paddle._C_ops.set_value_( + set_value__73, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__73 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__75 = paddle._C_ops.set_value_( + set_value__74, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__74 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__75, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__75 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_302 = paddle._C_ops.reshape(set_value__7, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_153 = paddle._C_ops.transpose(reshape_302, [0, 1, 3, 2, 4, 5]) + del reshape_302 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_303 = paddle._C_ops.reshape(transpose_153, full_int_array_31) + del transpose_153 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_304 = paddle._C_ops.reshape(reshape_303, full_int_array_32) + del reshape_303 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_58 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_59 = paddle._C_ops.unsqueeze(reshape_304, full_int_array_0) + del reshape_304 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_7 = paddle._C_ops.subtract(unsqueeze_58, unsqueeze_59) + del unsqueeze_58, unsqueeze_59 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_7 = paddle._C_ops.not_equal(subtract_7, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_14 = paddle._C_ops.where(not_equal_7, full_60, subtract_7) + del not_equal_7, subtract_7 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_7 = paddle._C_ops.equal(where_14, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_15 = paddle._C_ops.where(equal_7, full_61, where_14) + del equal_7, where_14 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_91 = paddle._C_ops.shape64(reshape_134) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_145 = paddle._C_ops.slice( + shape64_91, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_91 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_77 = paddle._C_ops.matmul(reshape_134, parameter_112, False, False) + del parameter_112 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_105 = paddle._C_ops.add(matmul_77, parameter_111) + del parameter_111 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_127 = [slice_145, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_125 = paddle._C_ops.stack(combine_127, 0) + del combine_127 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_305 = paddle._C_ops.reshape(add_105, stack_125) + del stack_125 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_92 = paddle._C_ops.transpose(reshape_305, [2, 0, 3, 1, 4]) + del reshape_305 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_146 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_147 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + transpose_92, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_15 = paddle._C_ops.scale(slice_146, full_0, float("0"), True) + del slice_146 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_93 = paddle._C_ops.transpose(slice_147, [0, 1, 3, 2]) + del slice_147 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_78 = paddle._C_ops.matmul(scale_15, transpose_93, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_135 = paddle._C_ops.reshape(data_31, full_int_array_12) + del data_31 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_15 = paddle._C_ops.index_select(data_32, reshape_135, 0) + del data_32 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_306 = paddle._C_ops.reshape(index_select_15, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_94 = paddle._C_ops.transpose(reshape_306, [2, 0, 1]) + del reshape_306 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(transpose_94, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_106 = paddle._C_ops.add(matmul_78, unsqueeze_22) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_7 = paddle._C_ops.floor_divide(slice_145, full_62) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_128 = [floor_divide_7, full_46, full_57, full_29, full_29] + del floor_divide_7 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_126 = paddle._C_ops.stack(combine_128, 0) + del combine_128 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_136 = paddle._C_ops.reshape(add_106, stack_126) + del stack_126 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_60 = paddle._C_ops.unsqueeze(where_15, full_int_array_8) + del where_15 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_60, full_int_array_7) + del unsqueeze_60 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_107 = paddle._C_ops.add(reshape_136, unsqueeze_23) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_129 = [slice_145, full_57, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_127 = paddle._C_ops.stack(combine_129, 0) + del combine_129 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_307 = paddle._C_ops.reshape(add_107, stack_127) + del stack_127 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_15 = paddle._C_ops.softmax(reshape_307, -1) + del reshape_307 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_139 = paddle._C_ops.matmul(softmax_15, slice_15, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_95 = paddle._C_ops.transpose(matmul_139, [0, 2, 1, 3]) + del matmul_139 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_130 = [slice_145, full_29, full_43] + del slice_145 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_128 = paddle._C_ops.stack(combine_130, 0) + del combine_130 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_137 = paddle._C_ops.reshape(transpose_95, stack_128) + del stack_128 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_79 = paddle._C_ops.matmul(reshape_137, parameter_110, False, False) + del parameter_110 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_108 = paddle._C_ops.add(matmul_79, parameter_109) + del parameter_109 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_138 = paddle._C_ops.reshape(add_108, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_308 = paddle._C_ops.reshape(reshape_138, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_96 = paddle._C_ops.transpose(reshape_308, [0, 1, 3, 2, 4, 5]) + del reshape_308 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_139 = paddle._C_ops.reshape(transpose_96, full_int_array_41) + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_15 = paddle._C_ops.roll(reshape_139, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_131 = [slice_142, full_58, full_43] + del slice_142 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_129 = paddle._C_ops.stack(combine_131, 0) + del combine_131 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_140 = paddle._C_ops.reshape(roll_15, stack_129) + del stack_129 + + # pd_op.full: (xf32) <- () + full_16 = paddle._C_ops.full( + [], + float("0.804348"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_175 = full_16 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_92 = paddle._C_ops.shape64(reshape_140) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_148 = paddle._C_ops.slice( + shape64_92, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_92 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_132 = [slice_148, full_39, full_39] + del slice_148 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_130 = paddle._C_ops.stack(combine_132, 0) + del combine_132 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_28 = paddle._C_ops.uniform( + stack_130, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_130 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_206 = paddle._C_ops.add(full_16, uniform_28) + del uniform_28 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_28 = paddle._C_ops.floor(add_206) + del add_206 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_28 = paddle._C_ops.divide(reshape_140, full_16) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_28 = paddle._C_ops.multiply(divide_28, floor_28) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_109 = paddle._C_ops.add(add_104, multiply_28) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_102, layer_norm_103, layer_norm_104 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_109, parameter_108, parameter_107, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_107, parameter_108 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_102, parameter_106, False, False) + del parameter_106 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_110 = paddle._C_ops.add(matmul_80, parameter_105) + del parameter_105 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_15 = paddle._C_ops.gelu(add_110, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_81 = paddle._C_ops.matmul(gelu_15, parameter_104, False, False) + del parameter_104 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_111 = paddle._C_ops.add(matmul_81, parameter_103) + del parameter_103 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_93 = paddle._C_ops.shape64(add_111) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_149 = paddle._C_ops.slice( + shape64_93, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_93 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_133 = [slice_149, full_39, full_39] + del slice_149 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_131 = paddle._C_ops.stack(combine_133, 0) + del combine_133 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_29 = paddle._C_ops.uniform( + stack_131, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_131 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_207 = paddle._C_ops.add(full_16, uniform_29) + del uniform_29 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_29 = paddle._C_ops.floor(add_207) + del add_207 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_29 = paddle._C_ops.divide(add_111, full_16) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_29 = paddle._C_ops.multiply(divide_29, floor_29) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_112 = paddle._C_ops.add(add_109, multiply_29) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_94 = paddle._C_ops.shape64(add_112) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_150 = paddle._C_ops.slice( + shape64_94, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_94 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_105, layer_norm_106, layer_norm_107 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_112, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_134 = [slice_150, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_132 = paddle._C_ops.stack(combine_134, 0) + del combine_134 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(layer_norm_105, stack_132) + del stack_132 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_95 = paddle._C_ops.shape64(reshape_141) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_151 = paddle._C_ops.slice( + shape64_95, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_95 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_135 = [slice_151, full_56, full_28, full_56, full_28, full_43] + del slice_151 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_133 = paddle._C_ops.stack(combine_135, 0) + del combine_135 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_309 = paddle._C_ops.reshape(reshape_141, stack_133) + del stack_133 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_97 = paddle._C_ops.transpose(reshape_309, [0, 1, 3, 2, 4, 5]) + del reshape_309 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_142 = paddle._C_ops.reshape(transpose_97, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_143 = paddle._C_ops.reshape(reshape_142, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_96 = paddle._C_ops.shape64(reshape_143) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_152 = paddle._C_ops.slice( + shape64_96, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_96 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_82 = paddle._C_ops.matmul(reshape_143, parameter_100, False, False) + del parameter_100 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_113 = paddle._C_ops.add(matmul_82, parameter_99) + del parameter_99 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_136 = [slice_152, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_134 = paddle._C_ops.stack(combine_136, 0) + del combine_136 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_310 = paddle._C_ops.reshape(add_113, stack_134) + del stack_134 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_98 = paddle._C_ops.transpose(reshape_310, [2, 0, 3, 1, 4]) + del reshape_310 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_153 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_154 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + transpose_98, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_16 = paddle._C_ops.scale(slice_153, full_0, float("0"), True) + del slice_153 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_99 = paddle._C_ops.transpose(slice_154, [0, 1, 3, 2]) + del slice_154 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_83 = paddle._C_ops.matmul(scale_16, transpose_99, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_144 = paddle._C_ops.reshape(data_33, full_int_array_12) + del data_33 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_16 = paddle._C_ops.index_select(data_34, reshape_144, 0) + del data_34 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_311 = paddle._C_ops.reshape(index_select_16, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_100 = paddle._C_ops.transpose(reshape_311, [2, 0, 1]) + del reshape_311 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_100, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_208 = paddle._C_ops.add(matmul_83, unsqueeze_24) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_16 = paddle._C_ops.softmax(add_208, -1) + del add_208 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_140 = paddle._C_ops.matmul(softmax_16, slice_16, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_101 = paddle._C_ops.transpose(matmul_140, [0, 2, 1, 3]) + del matmul_140 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_137 = [slice_152, full_29, full_43] + del slice_152 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_135 = paddle._C_ops.stack(combine_137, 0) + del combine_137 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_145 = paddle._C_ops.reshape(transpose_101, stack_135) + del stack_135 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_84 = paddle._C_ops.matmul(reshape_145, parameter_98, False, False) + del parameter_98 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_114 = paddle._C_ops.add(matmul_84, parameter_97) + del parameter_97 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_114, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_312 = paddle._C_ops.reshape(reshape_146, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_102 = paddle._C_ops.transpose(reshape_312, [0, 1, 3, 2, 4, 5]) + del reshape_312 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_147 = paddle._C_ops.reshape(transpose_102, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_138 = [slice_150, full_58, full_43] + del slice_150 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_136 = paddle._C_ops.stack(combine_138, 0) + del combine_138 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_148 = paddle._C_ops.reshape(reshape_147, stack_136) + del stack_136 + + # pd_op.full: (xf32) <- () + full_17 = paddle._C_ops.full( + [], + float("0.791304"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_184 = full_17 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_97 = paddle._C_ops.shape64(reshape_148) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_155 = paddle._C_ops.slice( + shape64_97, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_97 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_139 = [slice_155, full_39, full_39] + del slice_155 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_137 = paddle._C_ops.stack(combine_139, 0) + del combine_139 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_30 = paddle._C_ops.uniform( + stack_137, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_137 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_209 = paddle._C_ops.add(full_17, uniform_30) + del uniform_30 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_30 = paddle._C_ops.floor(add_209) + del add_209 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_30 = paddle._C_ops.divide(reshape_148, full_17) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_30 = paddle._C_ops.multiply(divide_30, floor_30) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_115 = paddle._C_ops.add(add_112, multiply_30) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_108, layer_norm_109, layer_norm_110 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_115, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_85 = paddle._C_ops.matmul(layer_norm_108, parameter_94, False, False) + del parameter_94 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_116 = paddle._C_ops.add(matmul_85, parameter_93) + del parameter_93 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_16 = paddle._C_ops.gelu(add_116, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_86 = paddle._C_ops.matmul(gelu_16, parameter_92, False, False) + del parameter_92 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_117 = paddle._C_ops.add(matmul_86, parameter_91) + del parameter_91 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_98 = paddle._C_ops.shape64(add_117) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_156 = paddle._C_ops.slice( + shape64_98, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_98 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_140 = [slice_156, full_39, full_39] + del slice_156 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_138 = paddle._C_ops.stack(combine_140, 0) + del combine_140 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_31 = paddle._C_ops.uniform( + stack_138, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_138 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_210 = paddle._C_ops.add(full_17, uniform_31) + del uniform_31 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_31 = paddle._C_ops.floor(add_210) + del add_210 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_31 = paddle._C_ops.divide(add_117, full_17) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_31 = paddle._C_ops.multiply(divide_31, floor_31) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_118 = paddle._C_ops.add(add_115, multiply_31) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_99 = paddle._C_ops.shape64(add_118) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_157 = paddle._C_ops.slice( + shape64_99, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_99 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_111, layer_norm_112, layer_norm_113 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_141 = [slice_157, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_139 = paddle._C_ops.stack(combine_141, 0) + del combine_141 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_149 = paddle._C_ops.reshape(layer_norm_111, stack_139) + del stack_139 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_100 = paddle._C_ops.shape64(reshape_149) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_158 = paddle._C_ops.slice( + shape64_100, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_100 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_16 = paddle._C_ops.roll(reshape_149, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_101 = paddle._C_ops.shape64(roll_16) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_159 = paddle._C_ops.slice( + shape64_101, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_101 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_142 = [slice_159, full_56, full_28, full_56, full_28, full_43] + del slice_159 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_140 = paddle._C_ops.stack(combine_142, 0) + del combine_142 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_313 = paddle._C_ops.reshape(roll_16, stack_140) + del stack_140 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_103 = paddle._C_ops.transpose(reshape_313, [0, 1, 3, 2, 4, 5]) + del reshape_313 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_103, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(reshape_150, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_68 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__76 = paddle._C_ops.set_value_( + full_68, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_68 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__77 = paddle._C_ops.set_value_( + set_value__76, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__76 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__78 = paddle._C_ops.set_value_( + set_value__77, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__77 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__79 = paddle._C_ops.set_value_( + set_value__78, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__78 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__80 = paddle._C_ops.set_value_( + set_value__79, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__79 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__81 = paddle._C_ops.set_value_( + set_value__80, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__80 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__82 = paddle._C_ops.set_value_( + set_value__81, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__81 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__83 = paddle._C_ops.set_value_( + set_value__82, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__82 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__83, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__83 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_314 = paddle._C_ops.reshape(set_value__8, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_154 = paddle._C_ops.transpose(reshape_314, [0, 1, 3, 2, 4, 5]) + del reshape_314 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_315 = paddle._C_ops.reshape(transpose_154, full_int_array_31) + del transpose_154 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_316 = paddle._C_ops.reshape(reshape_315, full_int_array_32) + del reshape_315 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_61 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_62 = paddle._C_ops.unsqueeze(reshape_316, full_int_array_0) + del reshape_316 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_8 = paddle._C_ops.subtract(unsqueeze_61, unsqueeze_62) + del unsqueeze_61, unsqueeze_62 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_8 = paddle._C_ops.not_equal(subtract_8, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_16 = paddle._C_ops.where(not_equal_8, full_60, subtract_8) + del not_equal_8, subtract_8 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_8 = paddle._C_ops.equal(where_16, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_17 = paddle._C_ops.where(equal_8, full_61, where_16) + del equal_8, where_16 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_102 = paddle._C_ops.shape64(reshape_151) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_160 = paddle._C_ops.slice( + shape64_102, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_102 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_87 = paddle._C_ops.matmul(reshape_151, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_119 = paddle._C_ops.add(matmul_87, parameter_87) + del parameter_87 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_143 = [slice_160, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_141 = paddle._C_ops.stack(combine_143, 0) + del combine_143 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_317 = paddle._C_ops.reshape(add_119, stack_141) + del stack_141 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_104 = paddle._C_ops.transpose(reshape_317, [2, 0, 3, 1, 4]) + del reshape_317 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_161 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_162 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + transpose_104, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_17 = paddle._C_ops.scale(slice_161, full_0, float("0"), True) + del slice_161 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_105 = paddle._C_ops.transpose(slice_162, [0, 1, 3, 2]) + del slice_162 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_88 = paddle._C_ops.matmul(scale_17, transpose_105, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_152 = paddle._C_ops.reshape(data_35, full_int_array_12) + del data_35 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_17 = paddle._C_ops.index_select(data_36, reshape_152, 0) + del data_36 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_318 = paddle._C_ops.reshape(index_select_17, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_106 = paddle._C_ops.transpose(reshape_318, [2, 0, 1]) + del reshape_318 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(transpose_106, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_120 = paddle._C_ops.add(matmul_88, unsqueeze_25) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_8 = paddle._C_ops.floor_divide(slice_160, full_62) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_144 = [floor_divide_8, full_46, full_57, full_29, full_29] + del floor_divide_8 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_142 = paddle._C_ops.stack(combine_144, 0) + del combine_144 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_153 = paddle._C_ops.reshape(add_120, stack_142) + del stack_142 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_63 = paddle._C_ops.unsqueeze(where_17, full_int_array_8) + del where_17 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(unsqueeze_63, full_int_array_7) + del unsqueeze_63 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_121 = paddle._C_ops.add(reshape_153, unsqueeze_26) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_145 = [slice_160, full_57, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_143 = paddle._C_ops.stack(combine_145, 0) + del combine_145 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_319 = paddle._C_ops.reshape(add_121, stack_143) + del stack_143 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_17 = paddle._C_ops.softmax(reshape_319, -1) + del reshape_319 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_141 = paddle._C_ops.matmul(softmax_17, slice_17, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_107 = paddle._C_ops.transpose(matmul_141, [0, 2, 1, 3]) + del matmul_141 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_146 = [slice_160, full_29, full_43] + del slice_160 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_144 = paddle._C_ops.stack(combine_146, 0) + del combine_146 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_154 = paddle._C_ops.reshape(transpose_107, stack_144) + del stack_144 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_89 = paddle._C_ops.matmul(reshape_154, parameter_86, False, False) + del parameter_86 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_122 = paddle._C_ops.add(matmul_89, parameter_85) + del parameter_85 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_155 = paddle._C_ops.reshape(add_122, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_320 = paddle._C_ops.reshape(reshape_155, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_108 = paddle._C_ops.transpose(reshape_320, [0, 1, 3, 2, 4, 5]) + del reshape_320 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_156 = paddle._C_ops.reshape(transpose_108, full_int_array_41) + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_17 = paddle._C_ops.roll(reshape_156, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_147 = [slice_157, full_58, full_43] + del slice_157 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_145 = paddle._C_ops.stack(combine_147, 0) + del combine_147 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_157 = paddle._C_ops.reshape(roll_17, stack_145) + del stack_145 + + # pd_op.full: (xf32) <- () + full_18 = paddle._C_ops.full( + [], + float("0.778261"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_195 = full_18 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_103 = paddle._C_ops.shape64(reshape_157) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_163 = paddle._C_ops.slice( + shape64_103, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_103 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_148 = [slice_163, full_39, full_39] + del slice_163 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_146 = paddle._C_ops.stack(combine_148, 0) + del combine_148 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_32 = paddle._C_ops.uniform( + stack_146, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_146 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_211 = paddle._C_ops.add(full_18, uniform_32) + del uniform_32 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_32 = paddle._C_ops.floor(add_211) + del add_211 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_32 = paddle._C_ops.divide(reshape_157, full_18) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_32 = paddle._C_ops.multiply(divide_32, floor_32) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_123 = paddle._C_ops.add(add_118, multiply_32) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_114, layer_norm_115, layer_norm_116 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_123, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_114, parameter_82, False, False) + del parameter_82 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_124 = paddle._C_ops.add(matmul_90, parameter_81) + del parameter_81 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_17 = paddle._C_ops.gelu(add_124, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_91 = paddle._C_ops.matmul(gelu_17, parameter_80, False, False) + del parameter_80 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_125 = paddle._C_ops.add(matmul_91, parameter_79) + del parameter_79 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_104 = paddle._C_ops.shape64(add_125) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_164 = paddle._C_ops.slice( + shape64_104, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_104 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_149 = [slice_164, full_39, full_39] + del slice_164 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_147 = paddle._C_ops.stack(combine_149, 0) + del combine_149 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_33 = paddle._C_ops.uniform( + stack_147, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_147 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_212 = paddle._C_ops.add(full_18, uniform_33) + del uniform_33 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_33 = paddle._C_ops.floor(add_212) + del add_212 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_33 = paddle._C_ops.divide(add_125, full_18) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_33 = paddle._C_ops.multiply(divide_33, floor_33) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_126 = paddle._C_ops.add(add_123, multiply_33) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_105 = paddle._C_ops.shape64(add_126) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_165 = paddle._C_ops.slice( + shape64_105, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_105 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_117, layer_norm_118, layer_norm_119 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_126, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_150 = [slice_165, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_148 = paddle._C_ops.stack(combine_150, 0) + del combine_150 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_158 = paddle._C_ops.reshape(layer_norm_117, stack_148) + del stack_148 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_106 = paddle._C_ops.shape64(reshape_158) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_166 = paddle._C_ops.slice( + shape64_106, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_106 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_151 = [slice_166, full_56, full_28, full_56, full_28, full_43] + del slice_166 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_149 = paddle._C_ops.stack(combine_151, 0) + del combine_151 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_321 = paddle._C_ops.reshape(reshape_158, stack_149) + del stack_149 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_109 = paddle._C_ops.transpose(reshape_321, [0, 1, 3, 2, 4, 5]) + del reshape_321 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_159 = paddle._C_ops.reshape(transpose_109, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_160 = paddle._C_ops.reshape(reshape_159, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_107 = paddle._C_ops.shape64(reshape_160) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_167 = paddle._C_ops.slice( + shape64_107, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_107 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_92 = paddle._C_ops.matmul(reshape_160, parameter_76, False, False) + del parameter_76 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_127 = paddle._C_ops.add(matmul_92, parameter_75) + del parameter_75 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_152 = [slice_167, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_150 = paddle._C_ops.stack(combine_152, 0) + del combine_152 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_322 = paddle._C_ops.reshape(add_127, stack_150) + del stack_150 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_110 = paddle._C_ops.transpose(reshape_322, [2, 0, 3, 1, 4]) + del reshape_322 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_168 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_169 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + transpose_110, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_18 = paddle._C_ops.scale(slice_168, full_0, float("0"), True) + del slice_168 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_111 = paddle._C_ops.transpose(slice_169, [0, 1, 3, 2]) + del slice_169 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_93 = paddle._C_ops.matmul(scale_18, transpose_111, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_161 = paddle._C_ops.reshape(data_37, full_int_array_12) + del data_37 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_18 = paddle._C_ops.index_select(data_38, reshape_161, 0) + del data_38 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_323 = paddle._C_ops.reshape(index_select_18, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_112 = paddle._C_ops.transpose(reshape_323, [2, 0, 1]) + del reshape_323 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_112, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_213 = paddle._C_ops.add(matmul_93, unsqueeze_27) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_18 = paddle._C_ops.softmax(add_213, -1) + del add_213 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_142 = paddle._C_ops.matmul(softmax_18, slice_18, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_113 = paddle._C_ops.transpose(matmul_142, [0, 2, 1, 3]) + del matmul_142 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_153 = [slice_167, full_29, full_43] + del slice_167 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_151 = paddle._C_ops.stack(combine_153, 0) + del combine_153 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_162 = paddle._C_ops.reshape(transpose_113, stack_151) + del stack_151 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_94 = paddle._C_ops.matmul(reshape_162, parameter_74, False, False) + del parameter_74 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_128 = paddle._C_ops.add(matmul_94, parameter_73) + del parameter_73 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_163 = paddle._C_ops.reshape(add_128, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_324 = paddle._C_ops.reshape(reshape_163, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_114 = paddle._C_ops.transpose(reshape_324, [0, 1, 3, 2, 4, 5]) + del reshape_324 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_164 = paddle._C_ops.reshape(transpose_114, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_154 = [slice_165, full_58, full_43] + del slice_165 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_152 = paddle._C_ops.stack(combine_154, 0) + del combine_154 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_165 = paddle._C_ops.reshape(reshape_164, stack_152) + del stack_152 + + # pd_op.full: (xf32) <- () + full_19 = paddle._C_ops.full( + [], + float("0.765217"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_204 = full_19 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_108 = paddle._C_ops.shape64(reshape_165) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_170 = paddle._C_ops.slice( + shape64_108, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_108 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_155 = [slice_170, full_39, full_39] + del slice_170 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_153 = paddle._C_ops.stack(combine_155, 0) + del combine_155 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_34 = paddle._C_ops.uniform( + stack_153, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_153 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_214 = paddle._C_ops.add(full_19, uniform_34) + del uniform_34 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_34 = paddle._C_ops.floor(add_214) + del add_214 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_34 = paddle._C_ops.divide(reshape_165, full_19) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_34 = paddle._C_ops.multiply(divide_34, floor_34) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_129 = paddle._C_ops.add(add_126, multiply_34) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_120, layer_norm_121, layer_norm_122 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_129, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_120, parameter_70, False, False) + del parameter_70 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_130 = paddle._C_ops.add(matmul_95, parameter_69) + del parameter_69 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_18 = paddle._C_ops.gelu(add_130, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_96 = paddle._C_ops.matmul(gelu_18, parameter_68, False, False) + del parameter_68 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_131 = paddle._C_ops.add(matmul_96, parameter_67) + del parameter_67 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_109 = paddle._C_ops.shape64(add_131) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_171 = paddle._C_ops.slice( + shape64_109, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_109 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_156 = [slice_171, full_39, full_39] + del slice_171 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_154 = paddle._C_ops.stack(combine_156, 0) + del combine_156 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_35 = paddle._C_ops.uniform( + stack_154, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_154 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_215 = paddle._C_ops.add(full_19, uniform_35) + del uniform_35 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_35 = paddle._C_ops.floor(add_215) + del add_215 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_35 = paddle._C_ops.divide(add_131, full_19) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_35 = paddle._C_ops.multiply(divide_35, floor_35) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_132 = paddle._C_ops.add(add_129, multiply_35) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_110 = paddle._C_ops.shape64(add_132) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_172 = paddle._C_ops.slice( + shape64_110, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_110 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_123, layer_norm_124, layer_norm_125 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_132, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_157 = [slice_172, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_155 = paddle._C_ops.stack(combine_157, 0) + del combine_157 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_166 = paddle._C_ops.reshape(layer_norm_123, stack_155) + del stack_155 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_111 = paddle._C_ops.shape64(reshape_166) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_173 = paddle._C_ops.slice( + shape64_111, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_111 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_18 = paddle._C_ops.roll(reshape_166, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_112 = paddle._C_ops.shape64(roll_18) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_174 = paddle._C_ops.slice( + shape64_112, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_112 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_158 = [slice_174, full_56, full_28, full_56, full_28, full_43] + del slice_174 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_156 = paddle._C_ops.stack(combine_158, 0) + del combine_158 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_325 = paddle._C_ops.reshape(roll_18, stack_156) + del stack_156 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_115 = paddle._C_ops.transpose(reshape_325, [0, 1, 3, 2, 4, 5]) + del reshape_325 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_167 = paddle._C_ops.reshape(transpose_115, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_168 = paddle._C_ops.reshape(reshape_167, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_69 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__84 = paddle._C_ops.set_value_( + full_69, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_69 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__85 = paddle._C_ops.set_value_( + set_value__84, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__84 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__86 = paddle._C_ops.set_value_( + set_value__85, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__85 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__87 = paddle._C_ops.set_value_( + set_value__86, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__86 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__88 = paddle._C_ops.set_value_( + set_value__87, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__87 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__89 = paddle._C_ops.set_value_( + set_value__88, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__88 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__90 = paddle._C_ops.set_value_( + set_value__89, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__89 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__91 = paddle._C_ops.set_value_( + set_value__90, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__90 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__91, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__91 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_326 = paddle._C_ops.reshape(set_value__9, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_155 = paddle._C_ops.transpose(reshape_326, [0, 1, 3, 2, 4, 5]) + del reshape_326 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_327 = paddle._C_ops.reshape(transpose_155, full_int_array_31) + del transpose_155 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_328 = paddle._C_ops.reshape(reshape_327, full_int_array_32) + del reshape_327 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_64 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_65 = paddle._C_ops.unsqueeze(reshape_328, full_int_array_0) + del reshape_328 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_9 = paddle._C_ops.subtract(unsqueeze_64, unsqueeze_65) + del unsqueeze_64, unsqueeze_65 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_9 = paddle._C_ops.not_equal(subtract_9, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_18 = paddle._C_ops.where(not_equal_9, full_60, subtract_9) + del not_equal_9, subtract_9 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_9 = paddle._C_ops.equal(where_18, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_19 = paddle._C_ops.where(equal_9, full_61, where_18) + del equal_9, where_18 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_113 = paddle._C_ops.shape64(reshape_168) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_175 = paddle._C_ops.slice( + shape64_113, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_113 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_97 = paddle._C_ops.matmul(reshape_168, parameter_64, False, False) + del parameter_64 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_133 = paddle._C_ops.add(matmul_97, parameter_63) + del parameter_63 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_159 = [slice_175, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_157 = paddle._C_ops.stack(combine_159, 0) + del combine_159 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_329 = paddle._C_ops.reshape(add_133, stack_157) + del stack_157 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_116 = paddle._C_ops.transpose(reshape_329, [2, 0, 3, 1, 4]) + del reshape_329 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_176 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_177 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + transpose_116, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_19 = paddle._C_ops.scale(slice_176, full_0, float("0"), True) + del slice_176 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_117 = paddle._C_ops.transpose(slice_177, [0, 1, 3, 2]) + del slice_177 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_98 = paddle._C_ops.matmul(scale_19, transpose_117, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_169 = paddle._C_ops.reshape(data_39, full_int_array_12) + del data_39 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_19 = paddle._C_ops.index_select(data_40, reshape_169, 0) + del data_40 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_330 = paddle._C_ops.reshape(index_select_19, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_118 = paddle._C_ops.transpose(reshape_330, [2, 0, 1]) + del reshape_330 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(transpose_118, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_134 = paddle._C_ops.add(matmul_98, unsqueeze_28) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_9 = paddle._C_ops.floor_divide(slice_175, full_62) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_160 = [floor_divide_9, full_46, full_57, full_29, full_29] + del floor_divide_9 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_158 = paddle._C_ops.stack(combine_160, 0) + del combine_160 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_170 = paddle._C_ops.reshape(add_134, stack_158) + del stack_158 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_66 = paddle._C_ops.unsqueeze(where_19, full_int_array_8) + del where_19 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_66, full_int_array_7) + del unsqueeze_66 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_135 = paddle._C_ops.add(reshape_170, unsqueeze_29) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_161 = [slice_175, full_57, full_29, full_29] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_159 = paddle._C_ops.stack(combine_161, 0) + del combine_161 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_331 = paddle._C_ops.reshape(add_135, stack_159) + del stack_159 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_19 = paddle._C_ops.softmax(reshape_331, -1) + del reshape_331 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_143 = paddle._C_ops.matmul(softmax_19, slice_19, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_119 = paddle._C_ops.transpose(matmul_143, [0, 2, 1, 3]) + del matmul_143 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_162 = [slice_175, full_29, full_43] + del slice_175 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_160 = paddle._C_ops.stack(combine_162, 0) + del combine_162 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_171 = paddle._C_ops.reshape(transpose_119, stack_160) + del stack_160 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_99 = paddle._C_ops.matmul(reshape_171, parameter_62, False, False) + del parameter_62 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_136 = paddle._C_ops.add(matmul_99, parameter_61) + del parameter_61 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_172 = paddle._C_ops.reshape(add_136, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_332 = paddle._C_ops.reshape(reshape_172, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_120 = paddle._C_ops.transpose(reshape_332, [0, 1, 3, 2, 4, 5]) + del reshape_332 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_173 = paddle._C_ops.reshape(transpose_120, full_int_array_41) + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_19 = paddle._C_ops.roll(reshape_173, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_163 = [slice_172, full_58, full_43] + del slice_172 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_161 = paddle._C_ops.stack(combine_163, 0) + del combine_163 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_174 = paddle._C_ops.reshape(roll_19, stack_161) + del stack_161 + + # pd_op.full: (xf32) <- () + full_20 = paddle._C_ops.full( + [], + float("0.752174"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_215 = full_20 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_114 = paddle._C_ops.shape64(reshape_174) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_178 = paddle._C_ops.slice( + shape64_114, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_114 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_164 = [slice_178, full_39, full_39] + del slice_178 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_162 = paddle._C_ops.stack(combine_164, 0) + del combine_164 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_36 = paddle._C_ops.uniform( + stack_162, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_162 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_216 = paddle._C_ops.add(full_20, uniform_36) + del uniform_36 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_36 = paddle._C_ops.floor(add_216) + del add_216 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_36 = paddle._C_ops.divide(reshape_174, full_20) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_36 = paddle._C_ops.multiply(divide_36, floor_36) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_137 = paddle._C_ops.add(add_132, multiply_36) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_126, layer_norm_127, layer_norm_128 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_137, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_100 = paddle._C_ops.matmul(layer_norm_126, parameter_58, False, False) + del parameter_58 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_138 = paddle._C_ops.add(matmul_100, parameter_57) + del parameter_57 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_19 = paddle._C_ops.gelu(add_138, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_101 = paddle._C_ops.matmul(gelu_19, parameter_56, False, False) + del parameter_56 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_139 = paddle._C_ops.add(matmul_101, parameter_55) + del parameter_55 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_115 = paddle._C_ops.shape64(add_139) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_179 = paddle._C_ops.slice( + shape64_115, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_115 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_165 = [slice_179, full_39, full_39] + del slice_179 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_163 = paddle._C_ops.stack(combine_165, 0) + del combine_165 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_37 = paddle._C_ops.uniform( + stack_163, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_163 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_217 = paddle._C_ops.add(full_20, uniform_37) + del uniform_37 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_37 = paddle._C_ops.floor(add_217) + del add_217 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_37 = paddle._C_ops.divide(add_139, full_20) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_37 = paddle._C_ops.multiply(divide_37, floor_37) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_140 = paddle._C_ops.add(add_137, multiply_37) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_116 = paddle._C_ops.shape64(add_140) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_180 = paddle._C_ops.slice( + shape64_116, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_116 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_129, layer_norm_130, layer_norm_131 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_140, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_166 = [slice_180, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_164 = paddle._C_ops.stack(combine_166, 0) + del combine_166 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_175 = paddle._C_ops.reshape(layer_norm_129, stack_164) + del stack_164 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_117 = paddle._C_ops.shape64(reshape_175) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_181 = paddle._C_ops.slice( + shape64_117, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_117 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_167 = [slice_181, full_56, full_28, full_56, full_28, full_43] + del slice_181 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_165 = paddle._C_ops.stack(combine_167, 0) + del combine_167 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_333 = paddle._C_ops.reshape(reshape_175, stack_165) + del stack_165 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_121 = paddle._C_ops.transpose(reshape_333, [0, 1, 3, 2, 4, 5]) + del reshape_333 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_176 = paddle._C_ops.reshape(transpose_121, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_177 = paddle._C_ops.reshape(reshape_176, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_118 = paddle._C_ops.shape64(reshape_177) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_182 = paddle._C_ops.slice( + shape64_118, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_118 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_102 = paddle._C_ops.matmul(reshape_177, parameter_52, False, False) + del parameter_52 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_141 = paddle._C_ops.add(matmul_102, parameter_51) + del parameter_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_168 = [slice_182, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_166 = paddle._C_ops.stack(combine_168, 0) + del combine_168 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_334 = paddle._C_ops.reshape(add_141, stack_166) + del stack_166 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_122 = paddle._C_ops.transpose(reshape_334, [2, 0, 3, 1, 4]) + del reshape_334 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_183 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_184 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + transpose_122, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_20 = paddle._C_ops.scale(slice_183, full_0, float("0"), True) + del slice_183 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_123 = paddle._C_ops.transpose(slice_184, [0, 1, 3, 2]) + del slice_184 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_103 = paddle._C_ops.matmul(scale_20, transpose_123, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_178 = paddle._C_ops.reshape(data_41, full_int_array_12) + del data_41 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_20 = paddle._C_ops.index_select(data_42, reshape_178, 0) + del data_42 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_335 = paddle._C_ops.reshape(index_select_20, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_124 = paddle._C_ops.transpose(reshape_335, [2, 0, 1]) + del reshape_335 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_124, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_218 = paddle._C_ops.add(matmul_103, unsqueeze_30) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_20 = paddle._C_ops.softmax(add_218, -1) + del add_218 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_144 = paddle._C_ops.matmul(softmax_20, slice_20, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_125 = paddle._C_ops.transpose(matmul_144, [0, 2, 1, 3]) + del matmul_144 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_169 = [slice_182, full_29, full_43] + del slice_182 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_167 = paddle._C_ops.stack(combine_169, 0) + del combine_169 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_179 = paddle._C_ops.reshape(transpose_125, stack_167) + del stack_167 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_104 = paddle._C_ops.matmul(reshape_179, parameter_50, False, False) + del parameter_50 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_142 = paddle._C_ops.add(matmul_104, parameter_49) + del parameter_49 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_180 = paddle._C_ops.reshape(add_142, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_336 = paddle._C_ops.reshape(reshape_180, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_126 = paddle._C_ops.transpose(reshape_336, [0, 1, 3, 2, 4, 5]) + del reshape_336 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_181 = paddle._C_ops.reshape(transpose_126, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_170 = [slice_180, full_58, full_43] + del slice_180 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_168 = paddle._C_ops.stack(combine_170, 0) + del combine_170 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_182 = paddle._C_ops.reshape(reshape_181, stack_168) + del stack_168 + + # pd_op.full: (xf32) <- () + full_21 = paddle._C_ops.full( + [], + float("0.73913"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_224 = full_21 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_119 = paddle._C_ops.shape64(reshape_182) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_185 = paddle._C_ops.slice( + shape64_119, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_119 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_171 = [slice_185, full_39, full_39] + del slice_185 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_169 = paddle._C_ops.stack(combine_171, 0) + del combine_171 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_38 = paddle._C_ops.uniform( + stack_169, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_169 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_219 = paddle._C_ops.add(full_21, uniform_38) + del uniform_38 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_38 = paddle._C_ops.floor(add_219) + del add_219 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_38 = paddle._C_ops.divide(reshape_182, full_21) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_38 = paddle._C_ops.multiply(divide_38, floor_38) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_143 = paddle._C_ops.add(add_140, multiply_38) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_132, layer_norm_133, layer_norm_134 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_143, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_105 = paddle._C_ops.matmul(layer_norm_132, parameter_46, False, False) + del parameter_46 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_144 = paddle._C_ops.add(matmul_105, parameter_45) + del parameter_45 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_20 = paddle._C_ops.gelu(add_144, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_106 = paddle._C_ops.matmul(gelu_20, parameter_44, False, False) + del parameter_44 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_145 = paddle._C_ops.add(matmul_106, parameter_43) + del parameter_43 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_120 = paddle._C_ops.shape64(add_145) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_186 = paddle._C_ops.slice( + shape64_120, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_120 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_172 = [slice_186, full_39, full_39] + del slice_186 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_170 = paddle._C_ops.stack(combine_172, 0) + del combine_172 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_39 = paddle._C_ops.uniform( + stack_170, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_170 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_220 = paddle._C_ops.add(full_21, uniform_39) + del uniform_39 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_39 = paddle._C_ops.floor(add_220) + del add_220 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_39 = paddle._C_ops.divide(add_145, full_21) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_39 = paddle._C_ops.multiply(divide_39, floor_39) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_146 = paddle._C_ops.add(add_143, multiply_39) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_121 = paddle._C_ops.shape64(add_146) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_187 = paddle._C_ops.slice( + shape64_121, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_121 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_135, layer_norm_136, layer_norm_137 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_146, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_173 = [slice_187, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_171 = paddle._C_ops.stack(combine_173, 0) + del combine_173 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_183 = paddle._C_ops.reshape(layer_norm_135, stack_171) + del stack_171 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_122 = paddle._C_ops.shape64(reshape_183) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_188 = paddle._C_ops.slice( + shape64_122, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_122 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_20 = paddle._C_ops.roll(reshape_183, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_123 = paddle._C_ops.shape64(roll_20) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_189 = paddle._C_ops.slice( + shape64_123, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_123 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_174 = [slice_189, full_56, full_28, full_56, full_28, full_43] + del full_56, slice_189 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_172 = paddle._C_ops.stack(combine_174, 0) + del combine_174 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_337 = paddle._C_ops.reshape(roll_20, stack_172) + del stack_172 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_127 = paddle._C_ops.transpose(reshape_337, [0, 1, 3, 2, 4, 5]) + del reshape_337 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_184 = paddle._C_ops.reshape(transpose_127, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_185 = paddle._C_ops.reshape(reshape_184, full_int_array_39) + del full_int_array_39 + + # pd_op.full: (1x14x14x1xf32) <- () + full_70 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__92 = paddle._C_ops.set_value_( + full_70, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_70 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__93 = paddle._C_ops.set_value_( + set_value__92, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__92 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__94 = paddle._C_ops.set_value_( + set_value__93, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__93 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__95 = paddle._C_ops.set_value_( + set_value__94, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__94 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__96 = paddle._C_ops.set_value_( + set_value__95, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__95 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__97 = paddle._C_ops.set_value_( + set_value__96, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__96 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__98 = paddle._C_ops.set_value_( + set_value__97, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__97 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__99 = paddle._C_ops.set_value_( + set_value__98, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__98 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__99, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__99 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_338 = paddle._C_ops.reshape(set_value__10, full_int_array_42) + del full_int_array_42 + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_156 = paddle._C_ops.transpose(reshape_338, [0, 1, 3, 2, 4, 5]) + del reshape_338 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_339 = paddle._C_ops.reshape(transpose_156, full_int_array_31) + del transpose_156 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_340 = paddle._C_ops.reshape(reshape_339, full_int_array_32) + del reshape_339 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_67 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_68 = paddle._C_ops.unsqueeze(reshape_340, full_int_array_0) + del reshape_340 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_10 = paddle._C_ops.subtract(unsqueeze_67, unsqueeze_68) + del unsqueeze_67, unsqueeze_68 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_10 = paddle._C_ops.not_equal(subtract_10, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_20 = paddle._C_ops.where(not_equal_10, full_60, subtract_10) + del full_60, not_equal_10, subtract_10 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_10 = paddle._C_ops.equal(where_20, full_34) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_21 = paddle._C_ops.where(equal_10, full_61, where_20) + del equal_10, full_61, where_20 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_124 = paddle._C_ops.shape64(reshape_185) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_190 = paddle._C_ops.slice( + shape64_124, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_124 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_107 = paddle._C_ops.matmul(reshape_185, parameter_40, False, False) + del parameter_40 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_147 = paddle._C_ops.add(matmul_107, parameter_39) + del parameter_39 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_175 = [slice_190, full_29, full_30, full_57, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_173 = paddle._C_ops.stack(combine_175, 0) + del combine_175 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_341 = paddle._C_ops.reshape(add_147, stack_173) + del stack_173 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_128 = paddle._C_ops.transpose(reshape_341, [2, 0, 3, 1, 4]) + del reshape_341 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_191 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_192 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_128, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_21 = paddle._C_ops.scale(slice_191, full_0, float("0"), True) + del slice_191 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_129 = paddle._C_ops.transpose(slice_192, [0, 1, 3, 2]) + del slice_192 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_108 = paddle._C_ops.matmul(scale_21, transpose_129, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_186 = paddle._C_ops.reshape(data_43, full_int_array_12) + del data_43 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_21 = paddle._C_ops.index_select(data_44, reshape_186, 0) + del data_44 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_342 = paddle._C_ops.reshape(index_select_21, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_130 = paddle._C_ops.transpose(reshape_342, [2, 0, 1]) + del reshape_342 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(transpose_130, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_148 = paddle._C_ops.add(matmul_108, unsqueeze_31) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_10 = paddle._C_ops.floor_divide(slice_190, full_62) + del full_62 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_176 = [floor_divide_10, full_46, full_57, full_29, full_29] + del floor_divide_10, full_46 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_174 = paddle._C_ops.stack(combine_176, 0) + del combine_176 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_187 = paddle._C_ops.reshape(add_148, stack_174) + del stack_174 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_69 = paddle._C_ops.unsqueeze(where_21, full_int_array_8) + del where_21 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(unsqueeze_69, full_int_array_7) + del unsqueeze_69 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_149 = paddle._C_ops.add(reshape_187, unsqueeze_32) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_177 = [slice_190, full_57, full_29, full_29] + del full_57 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_175 = paddle._C_ops.stack(combine_177, 0) + del combine_177 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_343 = paddle._C_ops.reshape(add_149, stack_175) + del stack_175 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_21 = paddle._C_ops.softmax(reshape_343, -1) + del reshape_343 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_145 = paddle._C_ops.matmul(softmax_21, slice_21, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_131 = paddle._C_ops.transpose(matmul_145, [0, 2, 1, 3]) + del matmul_145 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_178 = [slice_190, full_29, full_43] + del slice_190 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_176 = paddle._C_ops.stack(combine_178, 0) + del combine_178 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_188 = paddle._C_ops.reshape(transpose_131, stack_176) + del stack_176 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_109 = paddle._C_ops.matmul(reshape_188, parameter_38, False, False) + del parameter_38 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_150 = paddle._C_ops.add(matmul_109, parameter_37) + del parameter_37 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_189 = paddle._C_ops.reshape(add_150, full_int_array_38) + del full_int_array_38 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_344 = paddle._C_ops.reshape(reshape_189, full_int_array_40) + del full_int_array_40 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_132 = paddle._C_ops.transpose(reshape_344, [0, 1, 3, 2, 4, 5]) + del reshape_344 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_190 = paddle._C_ops.reshape(transpose_132, full_int_array_41) + del full_int_array_41 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_21 = paddle._C_ops.roll(reshape_190, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_179 = [slice_187, full_58, full_43] + del full_58, slice_187 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_177 = paddle._C_ops.stack(combine_179, 0) + del combine_179 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_191 = paddle._C_ops.reshape(roll_21, stack_177) + del stack_177 + + # pd_op.full: (xf32) <- () + full_22 = paddle._C_ops.full( + [], + float("0.726087"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_235 = full_22 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_125 = paddle._C_ops.shape64(reshape_191) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_193 = paddle._C_ops.slice( + shape64_125, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_125 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_180 = [slice_193, full_39, full_39] + del slice_193 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_178 = paddle._C_ops.stack(combine_180, 0) + del combine_180 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_40 = paddle._C_ops.uniform( + stack_178, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_178 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_221 = paddle._C_ops.add(full_22, uniform_40) + del uniform_40 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_40 = paddle._C_ops.floor(add_221) + del add_221 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_40 = paddle._C_ops.divide(reshape_191, full_22) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_40 = paddle._C_ops.multiply(divide_40, floor_40) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_151 = paddle._C_ops.add(add_146, multiply_40) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_138, layer_norm_139, layer_norm_140 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_138, parameter_34, False, False) + del parameter_34 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_152 = paddle._C_ops.add(matmul_110, parameter_33) + del parameter_33 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_21 = paddle._C_ops.gelu(add_152, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_111 = paddle._C_ops.matmul(gelu_21, parameter_32, False, False) + del parameter_32 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_153 = paddle._C_ops.add(matmul_111, parameter_31) + del parameter_31 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_126 = paddle._C_ops.shape64(add_153) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_194 = paddle._C_ops.slice( + shape64_126, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_126 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_181 = [slice_194, full_39, full_39] + del slice_194 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_179 = paddle._C_ops.stack(combine_181, 0) + del combine_181 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_41 = paddle._C_ops.uniform( + stack_179, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_179 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_222 = paddle._C_ops.add(full_22, uniform_41) + del uniform_41 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_41 = paddle._C_ops.floor(add_222) + del add_222 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_41 = paddle._C_ops.divide(add_153, full_22) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_41 = paddle._C_ops.multiply(divide_41, floor_41) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_154 = paddle._C_ops.add(add_151, multiply_41) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_127 = paddle._C_ops.shape64(add_154) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_195 = paddle._C_ops.slice( + shape64_127, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_127 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_182 = [slice_195, full_55, full_55, full_43] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_180 = paddle._C_ops.stack(combine_182, 0) + del combine_182 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_192 = paddle._C_ops.reshape(add_154, stack_180) + del stack_180 + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_16, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_5, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_6, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_192, [1, 2], full_int_array_18, full_int_array_29, full_int_array_4 + ) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_128 = paddle._C_ops.shape64(reshape_192) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_196 = paddle._C_ops.slice( + shape64_128, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_128 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_183 = [slice_196, full_55, full_55, full_43] + del full_43, full_55, slice_196 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_181 = paddle._C_ops.stack(combine_183, 0) + del combine_183 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 4xi64) + reshape_345 = paddle._C_ops.reshape(reshape_192, stack_181) + del stack_181 + + # builtin.combine: ([-1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32]) <- (-1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32) + combine_184 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + + # pd_op.concat: (-1x7x7x1536xf32) <- ([-1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_184, full_2) + del combine_184 + + # pd_op.full: (xi64) <- () + full_71 = paddle._C_ops.full( + [], float("1536"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_185 = [slice_195, full_42, full_71] + del full_42, full_71, slice_195 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_182 = paddle._C_ops.stack(combine_185, 0) + del combine_185 + + # pd_op.reshape: (-1x-1x1536xf32) <- (-1x7x7x1536xf32, 3xi64) + reshape_193 = paddle._C_ops.reshape(concat_2, stack_182) + del stack_182 + + # pd_op.layer_norm: (-1x-1x1536xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1536xf32, 1536xf32, 1536xf32) + layer_norm_141, layer_norm_142, layer_norm_143 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_193, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30 + + # pd_op.matmul: (-1x-1x768xf32) <- (-1x-1x1536xf32, 1536x768xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_141, parameter_28, False, False) + del parameter_28 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_129 = paddle._C_ops.shape64(matmul_112) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_197 = paddle._C_ops.slice( + shape64_129, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_129 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_130 = paddle._C_ops.shape64(matmul_112) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_198 = paddle._C_ops.slice( + shape64_130, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + del shape64_130 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_144, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_112, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_186 = [slice_197, full_28, full_28, full_54] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_183 = paddle._C_ops.stack(combine_186, 0) + del combine_186 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x-1x768xf32, 4xi64) + reshape_194 = paddle._C_ops.reshape(layer_norm_144, stack_183) + del stack_183 + + # pd_op.shape64: (4xi64) <- (-1x7x7x768xf32) + shape64_131 = paddle._C_ops.shape64(reshape_194) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_199 = paddle._C_ops.slice( + shape64_131, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_131 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_187 = [slice_199, full_39, full_28, full_39, full_28, full_54] + del slice_199 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_184 = paddle._C_ops.stack(combine_187, 0) + del combine_187 + + # pd_op.reshape: (-1x1x7x1x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_346 = paddle._C_ops.reshape(reshape_194, stack_184) + del stack_184 + + # pd_op.transpose: (-1x1x1x7x7x768xf32) <- (-1x1x7x1x7x768xf32) + transpose_133 = paddle._C_ops.transpose(reshape_346, [0, 1, 3, 2, 4, 5]) + del reshape_346 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_43 = [-1, 7, 7, 768] + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x1x7x7x768xf32, 4xi64) + reshape_195 = paddle._C_ops.reshape(transpose_133, full_int_array_43) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_44 = [-1, 49, 768] + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_196 = paddle._C_ops.reshape(reshape_195, full_int_array_44) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_132 = paddle._C_ops.shape64(reshape_196) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_200 = paddle._C_ops.slice( + shape64_132, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_132 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_113 = paddle._C_ops.matmul(reshape_196, parameter_25, False, False) + del parameter_25 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_155 = paddle._C_ops.add(matmul_113, parameter_24) + del parameter_24 + + # pd_op.full: (xi64) <- () + full_72 = paddle._C_ops.full( + [], float("24"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_188 = [slice_200, full_29, full_30, full_72, full_31] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_185 = paddle._C_ops.stack(combine_188, 0) + del combine_188 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_347 = paddle._C_ops.reshape(add_155, stack_185) + del stack_185 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_134 = paddle._C_ops.transpose(reshape_347, [2, 0, 3, 1, 4]) + del reshape_347 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_201 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_202 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_134, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_22 = paddle._C_ops.scale(slice_201, full_0, float("0"), True) + del slice_201 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_135 = paddle._C_ops.transpose(slice_202, [0, 1, 3, 2]) + del slice_202 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_114 = paddle._C_ops.matmul(scale_22, transpose_135, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_197 = paddle._C_ops.reshape(data_45, full_int_array_12) + del data_45 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_22 = paddle._C_ops.index_select(data_46, reshape_197, 0) + del data_46 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_348 = paddle._C_ops.reshape(index_select_22, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_136 = paddle._C_ops.transpose(reshape_348, [2, 0, 1]) + del reshape_348 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_136, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_223 = paddle._C_ops.add(matmul_114, unsqueeze_33) + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_22 = paddle._C_ops.softmax(add_223, -1) + del add_223 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_146 = paddle._C_ops.matmul(softmax_22, slice_22, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_137 = paddle._C_ops.transpose(matmul_146, [0, 2, 1, 3]) + del matmul_146 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_189 = [slice_200, full_29, full_54] + del slice_200 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_186 = paddle._C_ops.stack(combine_189, 0) + del combine_189 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_198 = paddle._C_ops.reshape(transpose_137, stack_186) + del stack_186 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_115 = paddle._C_ops.matmul(reshape_198, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_156 = paddle._C_ops.add(matmul_115, parameter_22) + del parameter_22 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_199 = paddle._C_ops.reshape(add_156, full_int_array_43) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_45 = [-1, 1, 1, 7, 7, 768] + + # pd_op.reshape: (-1x1x1x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_349 = paddle._C_ops.reshape(reshape_199, full_int_array_45) + + # pd_op.transpose: (-1x1x7x1x7x768xf32) <- (-1x1x1x7x7x768xf32) + transpose_138 = paddle._C_ops.transpose(reshape_349, [0, 1, 3, 2, 4, 5]) + del reshape_349 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x7x1x7x768xf32, 4xi64) + reshape_200 = paddle._C_ops.reshape(transpose_138, full_int_array_43) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_190 = [slice_197, full_29, full_54] + del slice_197 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_187 = paddle._C_ops.stack(combine_190, 0) + del combine_190 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_201 = paddle._C_ops.reshape(reshape_200, stack_187) + del stack_187 + + # pd_op.full: (xf32) <- () + full_23 = paddle._C_ops.full( + [], + float("0.713043"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_257 = full_23 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_133 = paddle._C_ops.shape64(reshape_201) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_203 = paddle._C_ops.slice( + shape64_133, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_133 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_191 = [slice_203, full_39, full_39] + del slice_203 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_188 = paddle._C_ops.stack(combine_191, 0) + del combine_191 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_42 = paddle._C_ops.uniform( + stack_188, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_188 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_224 = paddle._C_ops.add(full_23, uniform_42) + del uniform_42 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_42 = paddle._C_ops.floor(add_224) + del add_224 + + # pd_op.divide: (-1x49x768xf32) <- (-1x49x768xf32, xf32) + divide_42 = paddle._C_ops.divide(reshape_201, full_23) + + # pd_op.multiply: (-1x49x768xf32) <- (-1x49x768xf32, -1x1x1xf32) + multiply_42 = paddle._C_ops.multiply(divide_42, floor_42) + + # pd_op.add: (-1x49x768xf32) <- (-1x-1x768xf32, -1x49x768xf32) + add_157 = paddle._C_ops.add(matmul_112, multiply_42) + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_147, layer_norm_148, layer_norm_149 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_157, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (-1x49x3072xf32) <- (-1x49x768xf32, 768x3072xf32) + matmul_116 = paddle._C_ops.matmul(layer_norm_147, parameter_19, False, False) + del parameter_19 + + # pd_op.add: (-1x49x3072xf32) <- (-1x49x3072xf32, 3072xf32) + add_158 = paddle._C_ops.add(matmul_116, parameter_18) + del parameter_18 + + # pd_op.gelu: (-1x49x3072xf32) <- (-1x49x3072xf32) + gelu_22 = paddle._C_ops.gelu(add_158, False) + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x3072xf32, 3072x768xf32) + matmul_117 = paddle._C_ops.matmul(gelu_22, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_159 = paddle._C_ops.add(matmul_117, parameter_16) + del parameter_16 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_134 = paddle._C_ops.shape64(add_159) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_204 = paddle._C_ops.slice( + shape64_134, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_134 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_192 = [slice_204, full_39, full_39] + del slice_204 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_189 = paddle._C_ops.stack(combine_192, 0) + del combine_192 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_43 = paddle._C_ops.uniform( + stack_189, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_189 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_225 = paddle._C_ops.add(full_23, uniform_43) + del uniform_43 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_43 = paddle._C_ops.floor(add_225) + del add_225 + + # pd_op.divide: (-1x49x768xf32) <- (-1x49x768xf32, xf32) + divide_43 = paddle._C_ops.divide(add_159, full_23) + + # pd_op.multiply: (-1x49x768xf32) <- (-1x49x768xf32, -1x1x1xf32) + multiply_43 = paddle._C_ops.multiply(divide_43, floor_43) + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, -1x49x768xf32) + add_160 = paddle._C_ops.add(add_157, multiply_43) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_135 = paddle._C_ops.shape64(add_160) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_205 = paddle._C_ops.slice( + shape64_135, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_135 + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_150, layer_norm_151, layer_norm_152 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_160, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_193 = [slice_205, full_28, full_28, full_54] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_190 = paddle._C_ops.stack(combine_193, 0) + del combine_193 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_202 = paddle._C_ops.reshape(layer_norm_150, stack_190) + del stack_190 + + # pd_op.shape64: (4xi64) <- (-1x7x7x768xf32) + shape64_136 = paddle._C_ops.shape64(reshape_202) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_206 = paddle._C_ops.slice( + shape64_136, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_136 + + # pd_op.roll: (-1x7x7x768xf32) <- (-1x7x7x768xf32, 2xi64) + roll_22 = paddle._C_ops.roll(reshape_202, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x7x7x768xf32) + shape64_137 = paddle._C_ops.shape64(roll_22) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_207 = paddle._C_ops.slice( + shape64_137, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_137 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_194 = [slice_207, full_39, full_28, full_39, full_28, full_54] + del full_28, slice_207 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_191 = paddle._C_ops.stack(combine_194, 0) + del combine_194 + + # pd_op.reshape: (-1x1x7x1x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_350 = paddle._C_ops.reshape(roll_22, stack_191) + del stack_191 + + # pd_op.transpose: (-1x1x1x7x7x768xf32) <- (-1x1x7x1x7x768xf32) + transpose_139 = paddle._C_ops.transpose(reshape_350, [0, 1, 3, 2, 4, 5]) + del reshape_350 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x1x7x7x768xf32, 4xi64) + reshape_203 = paddle._C_ops.reshape(transpose_139, full_int_array_43) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_204 = paddle._C_ops.reshape(reshape_203, full_int_array_44) + del full_int_array_44 + + # pd_op.full: (1x7x7x1xf32) <- () + full_73 = paddle._C_ops.full( + [1, 7, 7, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__100 = paddle._C_ops.set_value_( + full_73, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_73, full_int_array_16 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__101 = paddle._C_ops.set_value_( + set_value__100, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_19, set_value__100 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__102 = paddle._C_ops.set_value_( + set_value__101, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_21, full_int_array_22, set_value__101 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__103 = paddle._C_ops.set_value_( + set_value__102, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_23, set_value__102 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__104 = paddle._C_ops.set_value_( + set_value__103, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_17, set_value__103 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__105 = paddle._C_ops.set_value_( + set_value__104, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_20, full_int_array_25, set_value__104 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__106 = paddle._C_ops.set_value_( + set_value__105, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_26, full_int_array_27, set_value__105 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__107 = paddle._C_ops.set_value_( + set_value__106, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_24, full_int_array_28, set_value__106 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__107, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_29, set_value__107 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_46 = [1, 1, 7, 1, 7, 1] + + # pd_op.reshape: (1x1x7x1x7x1xf32) <- (1x7x7x1xf32, 6xi64) + reshape_351 = paddle._C_ops.reshape(set_value__11, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (1x1x1x7x7x1xf32) <- (1x1x7x1x7x1xf32) + transpose_157 = paddle._C_ops.transpose(reshape_351, [0, 1, 3, 2, 4, 5]) + del reshape_351 + + # pd_op.reshape: (1x7x7x1xf32) <- (1x1x1x7x7x1xf32, 4xi64) + reshape_352 = paddle._C_ops.reshape(transpose_157, full_int_array_31) + del full_int_array_31, transpose_157 + + # pd_op.reshape: (1x49xf32) <- (1x7x7x1xf32, 2xi64) + reshape_353 = paddle._C_ops.reshape(reshape_352, full_int_array_32) + del full_int_array_32, reshape_352 + + # pd_op.unsqueeze: (1x1x49xf32) <- (1x49xf32, 1xi64) + unsqueeze_70 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_8) + + # pd_op.unsqueeze: (1x49x1xf32) <- (1x49xf32, 1xi64) + unsqueeze_71 = paddle._C_ops.unsqueeze(reshape_353, full_int_array_0) + del reshape_353 + + # pd_op.subtract: (1x49x49xf32) <- (1x1x49xf32, 1x49x1xf32) + subtract_11 = paddle._C_ops.subtract(unsqueeze_70, unsqueeze_71) + del unsqueeze_70, unsqueeze_71 + + # pd_op.not_equal: (1x49x49xb) <- (1x49x49xf32, xf32) + not_equal_11 = paddle._C_ops.not_equal(subtract_11, full_34) + + # pd_op.full: (1x49x49xf32) <- () + full_74 = paddle._C_ops.full( + [1, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_22 = paddle._C_ops.where(not_equal_11, full_74, subtract_11) + del full_74, not_equal_11, subtract_11 + + # pd_op.equal: (1x49x49xb) <- (1x49x49xf32, xf32) + equal_11 = paddle._C_ops.equal(where_22, full_34) + del full_34 + + # pd_op.full: (1x49x49xf32) <- () + full_75 = paddle._C_ops.full( + [1, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_23 = paddle._C_ops.where(equal_11, full_75, where_22) + del equal_11, full_75, where_22 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_138 = paddle._C_ops.shape64(reshape_204) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_208 = paddle._C_ops.slice( + shape64_138, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_138 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_118 = paddle._C_ops.matmul(reshape_204, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_161 = paddle._C_ops.add(matmul_118, parameter_12) + del parameter_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_195 = [slice_208, full_29, full_30, full_72, full_31] + del full_30, full_31 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_192 = paddle._C_ops.stack(combine_195, 0) + del combine_195 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_354 = paddle._C_ops.reshape(add_161, stack_192) + del stack_192 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_140 = paddle._C_ops.transpose(reshape_354, [2, 0, 3, 1, 4]) + del reshape_354 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_209 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_210 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_140, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_23 = paddle._C_ops.scale(slice_209, full_0, float("0"), True) + del slice_209 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_141 = paddle._C_ops.transpose(slice_210, [0, 1, 3, 2]) + del slice_210 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_119 = paddle._C_ops.matmul(scale_23, transpose_141, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_205 = paddle._C_ops.reshape(data_47, full_int_array_12) + del data_47, full_int_array_12 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_23 = paddle._C_ops.index_select(data_48, reshape_205, 0) + del data_48 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_355 = paddle._C_ops.reshape(index_select_23, full_int_array_13) + del full_int_array_13 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_142 = paddle._C_ops.transpose(reshape_355, [2, 0, 1]) + del reshape_355 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(transpose_142, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_162 = paddle._C_ops.add(matmul_119, unsqueeze_34) + + # pd_op.full: (xi64) <- () + full_76 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_11 = paddle._C_ops.floor_divide(slice_208, full_76) + del full_76 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_196 = [floor_divide_11, full_39, full_72, full_29, full_29] + del floor_divide_11 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_193 = paddle._C_ops.stack(combine_196, 0) + del combine_196 + + # pd_op.reshape: (-1x1x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_206 = paddle._C_ops.reshape(add_162, stack_193) + del stack_193 + + # pd_op.unsqueeze: (1x1x49x49xf32) <- (1x49x49xf32, 1xi64) + unsqueeze_72 = paddle._C_ops.unsqueeze(where_23, full_int_array_8) + del where_23 + + # pd_op.unsqueeze: (1x1x1x49x49xf32) <- (1x1x49x49xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_72, full_int_array_7) + del unsqueeze_72 + + # pd_op.add: (-1x1x24x49x49xf32) <- (-1x1x24x49x49xf32, 1x1x1x49x49xf32) + add_163 = paddle._C_ops.add(reshape_206, unsqueeze_35) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_197 = [slice_208, full_72, full_29, full_29] + del full_72 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_194 = paddle._C_ops.stack(combine_197, 0) + del combine_197 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x1x24x49x49xf32, 4xi64) + reshape_356 = paddle._C_ops.reshape(add_163, stack_194) + del stack_194 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_23 = paddle._C_ops.softmax(reshape_356, -1) + del reshape_356 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_147 = paddle._C_ops.matmul(softmax_23, slice_23, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_143 = paddle._C_ops.transpose(matmul_147, [0, 2, 1, 3]) + del matmul_147 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_198 = [slice_208, full_29, full_54] + del slice_208 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_195 = paddle._C_ops.stack(combine_198, 0) + del combine_198 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_207 = paddle._C_ops.reshape(transpose_143, stack_195) + del stack_195 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_120 = paddle._C_ops.matmul(reshape_207, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_164 = paddle._C_ops.add(matmul_120, parameter_10) + del parameter_10 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_208 = paddle._C_ops.reshape(add_164, full_int_array_43) + + # pd_op.reshape: (-1x1x1x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_357 = paddle._C_ops.reshape(reshape_208, full_int_array_45) + del full_int_array_45 + + # pd_op.transpose: (-1x1x7x1x7x768xf32) <- (-1x1x1x7x7x768xf32) + transpose_144 = paddle._C_ops.transpose(reshape_357, [0, 1, 3, 2, 4, 5]) + del reshape_357 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x7x1x7x768xf32, 4xi64) + reshape_209 = paddle._C_ops.reshape(transpose_144, full_int_array_43) + del full_int_array_43 + + # pd_op.roll: (-1x7x7x768xf32) <- (-1x7x7x768xf32, 2xi64) + roll_23 = paddle._C_ops.roll(reshape_209, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_199 = [slice_205, full_29, full_54] + del full_29, full_54, slice_205 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_196 = paddle._C_ops.stack(combine_199, 0) + del combine_199 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_210 = paddle._C_ops.reshape(roll_23, stack_196) + del stack_196 + + # pd_op.full: (xf32) <- () + full_24 = paddle._C_ops.full( + [], float("0.7"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_268 = full_24 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_139 = paddle._C_ops.shape64(reshape_210) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_211 = paddle._C_ops.slice( + shape64_139, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_139 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_200 = [slice_211, full_39, full_39] + del slice_211 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_197 = paddle._C_ops.stack(combine_200, 0) + del combine_200 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_44 = paddle._C_ops.uniform( + stack_197, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del stack_197 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_226 = paddle._C_ops.add(full_24, uniform_44) + del uniform_44 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_44 = paddle._C_ops.floor(add_226) + del add_226 + + # pd_op.divide: (-1x49x768xf32) <- (-1x49x768xf32, xf32) + divide_44 = paddle._C_ops.divide(reshape_210, full_24) + + # pd_op.multiply: (-1x49x768xf32) <- (-1x49x768xf32, -1x1x1xf32) + multiply_44 = paddle._C_ops.multiply(divide_44, floor_44) + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, -1x49x768xf32) + add_165 = paddle._C_ops.add(add_160, multiply_44) + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_153, layer_norm_154, layer_norm_155 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_165, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (-1x49x3072xf32) <- (-1x49x768xf32, 768x3072xf32) + matmul_121 = paddle._C_ops.matmul(layer_norm_153, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (-1x49x3072xf32) <- (-1x49x3072xf32, 3072xf32) + add_166 = paddle._C_ops.add(matmul_121, parameter_6) + del parameter_6 + + # pd_op.gelu: (-1x49x3072xf32) <- (-1x49x3072xf32) + gelu_23 = paddle._C_ops.gelu(add_166, False) + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x3072xf32, 3072x768xf32) + matmul_122 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del parameter_5 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_167 = paddle._C_ops.add(matmul_122, parameter_4) + del parameter_4 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_140 = paddle._C_ops.shape64(add_167) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_212 = paddle._C_ops.slice( + shape64_140, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del full_int_array_7, full_int_array_8, shape64_140 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_201 = [slice_212, full_39, full_39] + del full_39, slice_212 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_198 = paddle._C_ops.stack(combine_201, 0) + del combine_201 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_45 = paddle._C_ops.uniform( + stack_198, + paddle.float32, + full_40, + full_41, + 0, + paddle.framework._current_expected_place(), + ) + del full_40, full_41, stack_198 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_227 = paddle._C_ops.add(full_24, uniform_45) + del uniform_45 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_45 = paddle._C_ops.floor(add_227) + del add_227 + + # pd_op.divide: (-1x49x768xf32) <- (-1x49x768xf32, xf32) + divide_45 = paddle._C_ops.divide(add_167, full_24) + + # pd_op.multiply: (-1x49x768xf32) <- (-1x49x768xf32, -1x1x1xf32) + multiply_45 = paddle._C_ops.multiply(divide_45, floor_45) + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, -1x49x768xf32) + add_168 = paddle._C_ops.add(add_165, multiply_45) + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_158, layer_norm_156, layer_norm_157 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_168, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_2, parameter_3 + + # pd_op.transpose: (-1x768x49xf32) <- (-1x49x768xf32) + transpose_145 = paddle._C_ops.transpose(layer_norm_158, [0, 2, 1]) + del layer_norm_158 + + # pd_op.unsqueeze: (-1x768x1x49xf32) <- (-1x768x49xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_145, full_int_array_0) + + # pd_op.pool2d: (-1x768x1x1xf32) <- (-1x768x1x49xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_36, + full_int_array_18, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_18 + + # pd_op.squeeze: (-1x768x1xf32) <- (-1x768x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_0) + + # pd_op.flatten: (-1x768xf32) <- (-1x768x1xf32) + flatten_0 = paddle._C_ops.flatten(squeeze_0, 1, 2) + + # pd_op.matmul: (-1x102xf32) <- (-1x768xf32, 768x102xf32) + matmul_123 = paddle._C_ops.matmul(flatten_0, parameter_1, False, False) + del parameter_1 + + # pd_op.add: (-1x102xf32) <- (-1x102xf32, 102xf32) + add_169 = paddle._C_ops.add(matmul_123, parameter_0) + del ( + assign_0, + assign_1, + assign_10, + assign_100, + assign_101, + assign_103, + assign_105, + assign_106, + assign_107, + assign_108, + assign_109, + assign_110, + assign_111, + assign_113, + assign_114, + assign_116, + assign_117, + assign_118, + assign_119, + assign_12, + assign_120, + assign_121, + assign_123, + assign_125, + assign_126, + assign_127, + assign_128, + assign_129, + assign_130, + assign_131, + assign_133, + assign_134, + assign_136, + assign_137, + assign_138, + assign_139, + assign_14, + assign_140, + assign_141, + assign_143, + assign_145, + assign_146, + assign_147, + assign_148, + assign_149, + assign_15, + assign_150, + assign_151, + assign_153, + assign_154, + assign_156, + assign_157, + assign_158, + assign_159, + assign_16, + assign_160, + assign_161, + assign_163, + assign_165, + assign_166, + assign_167, + assign_168, + assign_169, + assign_17, + assign_170, + assign_171, + assign_173, + assign_174, + assign_176, + assign_177, + assign_178, + assign_179, + assign_18, + assign_180, + assign_181, + assign_183, + assign_185, + assign_186, + assign_187, + assign_188, + assign_189, + assign_19, + assign_190, + assign_191, + assign_193, + assign_194, + assign_196, + assign_197, + assign_198, + assign_199, + assign_2, + assign_20, + assign_200, + assign_201, + assign_203, + assign_205, + assign_206, + assign_207, + assign_208, + assign_209, + assign_21, + assign_210, + assign_211, + assign_213, + assign_214, + assign_216, + assign_217, + assign_218, + assign_219, + assign_22, + assign_220, + assign_221, + assign_223, + assign_225, + assign_226, + assign_227, + assign_228, + assign_229, + assign_23, + assign_230, + assign_231, + assign_233, + assign_234, + assign_236, + assign_237, + assign_238, + assign_239, + assign_24, + assign_240, + assign_241, + assign_242, + assign_243, + assign_244, + assign_245, + assign_246, + assign_247, + assign_249, + assign_25, + assign_250, + assign_251, + assign_252, + assign_253, + assign_254, + assign_256, + assign_258, + assign_259, + assign_26, + assign_260, + assign_261, + assign_262, + assign_263, + assign_264, + assign_266, + assign_267, + assign_269, + assign_27, + assign_270, + assign_271, + assign_28, + assign_3, + assign_30, + assign_32, + assign_33, + assign_34, + assign_35, + assign_36, + assign_37, + assign_38, + assign_4, + assign_40, + assign_41, + assign_43, + assign_44, + assign_45, + assign_46, + assign_47, + assign_48, + assign_49, + assign_5, + assign_50, + assign_51, + assign_52, + assign_53, + assign_54, + assign_56, + assign_57, + assign_58, + assign_59, + assign_6, + assign_60, + assign_61, + assign_63, + assign_65, + assign_66, + assign_67, + assign_68, + assign_69, + assign_7, + assign_70, + assign_71, + assign_73, + assign_74, + assign_76, + assign_77, + assign_78, + assign_79, + assign_8, + assign_80, + assign_81, + assign_83, + assign_85, + assign_86, + assign_87, + assign_88, + assign_89, + assign_9, + assign_90, + assign_91, + assign_93, + assign_94, + assign_96, + assign_97, + assign_98, + assign_99, + full_int_array_0, + full_int_array_1, + full_int_array_2, + full_int_array_3, + full_int_array_4, + full_int_array_5, + full_int_array_6, + parameter_0, + ) + + return ( + conv2d_0, + reshape_0, + add_0, + transpose_0, + layer_norm_0, + layer_norm_1, + layer_norm_2, + layer_norm_3, + layer_norm_4, + layer_norm_5, + reshape_1, + transpose_1, + reshape_2, + reshape_3, + matmul_0, + add_1, + transpose_2, + slice_0, + full_0, + scale_0, + transpose_3, + matmul_1, + reshape_4, + index_select_0, + transpose_4, + unsqueeze_0, + softmax_0, + transpose_5, + reshape_5, + matmul_2, + add_2, + reshape_6, + transpose_6, + reshape_7, + reshape_8, + add_3, + layer_norm_6, + layer_norm_7, + layer_norm_8, + matmul_3, + add_4, + gelu_0, + matmul_4, + add_5, + add_6, + layer_norm_9, + layer_norm_10, + layer_norm_11, + reshape_9, + roll_0, + transpose_7, + reshape_10, + reshape_11, + matmul_5, + add_7, + transpose_8, + slice_1, + assign_11, + scale_1, + transpose_9, + matmul_6, + reshape_12, + index_select_1, + transpose_10, + unsqueeze_1, + add_8, + reshape_13, + unsqueeze_2, + add_9, + softmax_1, + transpose_11, + reshape_14, + matmul_7, + add_10, + reshape_15, + transpose_12, + reshape_16, + roll_1, + reshape_17, + full_1, + floor_0, + divide_0, + multiply_0, + add_11, + layer_norm_12, + layer_norm_13, + layer_norm_14, + matmul_8, + add_12, + gelu_1, + matmul_9, + add_13, + assign_13, + floor_1, + divide_1, + multiply_1, + add_14, + reshape_18, + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + full_2, + concat_0, + reshape_19, + layer_norm_15, + layer_norm_16, + layer_norm_17, + matmul_10, + layer_norm_18, + layer_norm_19, + layer_norm_20, + reshape_20, + transpose_13, + reshape_21, + reshape_22, + matmul_11, + add_15, + transpose_14, + slice_2, + assign_29, + scale_2, + transpose_15, + matmul_12, + reshape_23, + index_select_2, + transpose_16, + unsqueeze_3, + softmax_2, + transpose_17, + reshape_24, + matmul_13, + add_16, + reshape_25, + transpose_18, + reshape_26, + reshape_27, + full_3, + floor_2, + divide_2, + multiply_2, + add_17, + layer_norm_21, + layer_norm_22, + layer_norm_23, + matmul_14, + add_18, + gelu_2, + matmul_15, + add_19, + assign_31, + floor_3, + divide_3, + multiply_3, + add_20, + layer_norm_24, + layer_norm_25, + layer_norm_26, + reshape_28, + roll_2, + transpose_19, + reshape_29, + reshape_30, + matmul_16, + add_21, + transpose_20, + slice_3, + assign_39, + scale_3, + transpose_21, + matmul_17, + reshape_31, + index_select_3, + transpose_22, + unsqueeze_4, + add_22, + reshape_32, + unsqueeze_5, + add_23, + softmax_3, + transpose_23, + reshape_33, + matmul_18, + add_24, + reshape_34, + transpose_24, + reshape_35, + roll_3, + reshape_36, + full_4, + floor_4, + divide_4, + multiply_4, + add_25, + layer_norm_27, + layer_norm_28, + layer_norm_29, + matmul_19, + add_26, + gelu_3, + matmul_20, + add_27, + assign_42, + floor_5, + divide_5, + multiply_5, + add_28, + reshape_37, + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + assign_55, + concat_1, + reshape_38, + layer_norm_30, + layer_norm_31, + layer_norm_32, + matmul_21, + layer_norm_33, + layer_norm_34, + layer_norm_35, + reshape_39, + transpose_25, + reshape_40, + reshape_41, + matmul_22, + add_29, + transpose_26, + slice_4, + assign_62, + scale_4, + transpose_27, + matmul_23, + reshape_42, + index_select_4, + transpose_28, + unsqueeze_6, + softmax_4, + transpose_29, + reshape_43, + matmul_24, + add_30, + reshape_44, + transpose_30, + reshape_45, + reshape_46, + full_5, + floor_6, + divide_6, + multiply_6, + add_31, + layer_norm_36, + layer_norm_37, + layer_norm_38, + matmul_25, + add_32, + gelu_4, + matmul_26, + add_33, + assign_64, + floor_7, + divide_7, + multiply_7, + add_34, + layer_norm_39, + layer_norm_40, + layer_norm_41, + reshape_47, + roll_4, + transpose_31, + reshape_48, + reshape_49, + matmul_27, + add_35, + transpose_32, + slice_5, + assign_72, + scale_5, + transpose_33, + matmul_28, + reshape_50, + index_select_5, + transpose_34, + unsqueeze_7, + add_36, + reshape_51, + unsqueeze_8, + add_37, + softmax_5, + transpose_35, + reshape_52, + matmul_29, + add_38, + reshape_53, + transpose_36, + reshape_54, + roll_5, + reshape_55, + full_6, + floor_8, + divide_8, + multiply_8, + add_39, + layer_norm_42, + layer_norm_43, + layer_norm_44, + matmul_30, + add_40, + gelu_5, + matmul_31, + add_41, + assign_75, + floor_9, + divide_9, + multiply_9, + add_42, + layer_norm_45, + layer_norm_46, + layer_norm_47, + reshape_56, + transpose_37, + reshape_57, + reshape_58, + matmul_32, + add_43, + transpose_38, + slice_6, + assign_82, + scale_6, + transpose_39, + matmul_33, + reshape_59, + index_select_6, + transpose_40, + unsqueeze_9, + softmax_6, + transpose_41, + reshape_60, + matmul_34, + add_44, + reshape_61, + transpose_42, + reshape_62, + reshape_63, + full_7, + floor_10, + divide_10, + multiply_10, + add_45, + layer_norm_48, + layer_norm_49, + layer_norm_50, + matmul_35, + add_46, + gelu_6, + matmul_36, + add_47, + assign_84, + floor_11, + divide_11, + multiply_11, + add_48, + layer_norm_51, + layer_norm_52, + layer_norm_53, + reshape_64, + roll_6, + transpose_43, + reshape_65, + reshape_66, + matmul_37, + add_49, + transpose_44, + slice_7, + assign_92, + scale_7, + transpose_45, + matmul_38, + reshape_67, + index_select_7, + transpose_46, + unsqueeze_10, + add_50, + reshape_68, + unsqueeze_11, + add_51, + softmax_7, + transpose_47, + reshape_69, + matmul_39, + add_52, + reshape_70, + transpose_48, + reshape_71, + roll_7, + reshape_72, + full_8, + floor_12, + divide_12, + multiply_12, + add_53, + layer_norm_54, + layer_norm_55, + layer_norm_56, + matmul_40, + add_54, + gelu_7, + matmul_41, + add_55, + assign_95, + floor_13, + divide_13, + multiply_13, + add_56, + layer_norm_57, + layer_norm_58, + layer_norm_59, + reshape_73, + transpose_49, + reshape_74, + reshape_75, + matmul_42, + add_57, + transpose_50, + slice_8, + assign_102, + scale_8, + transpose_51, + matmul_43, + reshape_76, + index_select_8, + transpose_52, + unsqueeze_12, + softmax_8, + transpose_53, + reshape_77, + matmul_44, + add_58, + reshape_78, + transpose_54, + reshape_79, + reshape_80, + full_9, + floor_14, + divide_14, + multiply_14, + add_59, + layer_norm_60, + layer_norm_61, + layer_norm_62, + matmul_45, + add_60, + gelu_8, + matmul_46, + add_61, + assign_104, + floor_15, + divide_15, + multiply_15, + add_62, + layer_norm_63, + layer_norm_64, + layer_norm_65, + reshape_81, + roll_8, + transpose_55, + reshape_82, + reshape_83, + matmul_47, + add_63, + transpose_56, + slice_9, + assign_112, + scale_9, + transpose_57, + matmul_48, + reshape_84, + index_select_9, + transpose_58, + unsqueeze_13, + add_64, + reshape_85, + unsqueeze_14, + add_65, + softmax_9, + transpose_59, + reshape_86, + matmul_49, + add_66, + reshape_87, + transpose_60, + reshape_88, + roll_9, + reshape_89, + full_10, + floor_16, + divide_16, + multiply_16, + add_67, + layer_norm_66, + layer_norm_67, + layer_norm_68, + matmul_50, + add_68, + gelu_9, + matmul_51, + add_69, + assign_115, + floor_17, + divide_17, + multiply_17, + add_70, + layer_norm_69, + layer_norm_70, + layer_norm_71, + reshape_90, + transpose_61, + reshape_91, + reshape_92, + matmul_52, + add_71, + transpose_62, + slice_10, + assign_122, + scale_10, + transpose_63, + matmul_53, + reshape_93, + index_select_10, + transpose_64, + unsqueeze_15, + softmax_10, + transpose_65, + reshape_94, + matmul_54, + add_72, + reshape_95, + transpose_66, + reshape_96, + reshape_97, + full_11, + floor_18, + divide_18, + multiply_18, + add_73, + layer_norm_72, + layer_norm_73, + layer_norm_74, + matmul_55, + add_74, + gelu_10, + matmul_56, + add_75, + assign_124, + floor_19, + divide_19, + multiply_19, + add_76, + layer_norm_75, + layer_norm_76, + layer_norm_77, + reshape_98, + roll_10, + transpose_67, + reshape_99, + reshape_100, + matmul_57, + add_77, + transpose_68, + slice_11, + assign_132, + scale_11, + transpose_69, + matmul_58, + reshape_101, + index_select_11, + transpose_70, + unsqueeze_16, + add_78, + reshape_102, + unsqueeze_17, + add_79, + softmax_11, + transpose_71, + reshape_103, + matmul_59, + add_80, + reshape_104, + transpose_72, + reshape_105, + roll_11, + reshape_106, + full_12, + floor_20, + divide_20, + multiply_20, + add_81, + layer_norm_78, + layer_norm_79, + layer_norm_80, + matmul_60, + add_82, + gelu_11, + matmul_61, + add_83, + assign_135, + floor_21, + divide_21, + multiply_21, + add_84, + layer_norm_81, + layer_norm_82, + layer_norm_83, + reshape_107, + transpose_73, + reshape_108, + reshape_109, + matmul_62, + add_85, + transpose_74, + slice_12, + assign_142, + scale_12, + transpose_75, + matmul_63, + reshape_110, + index_select_12, + transpose_76, + unsqueeze_18, + softmax_12, + transpose_77, + reshape_111, + matmul_64, + add_86, + reshape_112, + transpose_78, + reshape_113, + reshape_114, + full_13, + floor_22, + divide_22, + multiply_22, + add_87, + layer_norm_84, + layer_norm_85, + layer_norm_86, + matmul_65, + add_88, + gelu_12, + matmul_66, + add_89, + assign_144, + floor_23, + divide_23, + multiply_23, + add_90, + layer_norm_87, + layer_norm_88, + layer_norm_89, + reshape_115, + roll_12, + transpose_79, + reshape_116, + reshape_117, + matmul_67, + add_91, + transpose_80, + slice_13, + assign_152, + scale_13, + transpose_81, + matmul_68, + reshape_118, + index_select_13, + transpose_82, + unsqueeze_19, + add_92, + reshape_119, + unsqueeze_20, + add_93, + softmax_13, + transpose_83, + reshape_120, + matmul_69, + add_94, + reshape_121, + transpose_84, + reshape_122, + roll_13, + reshape_123, + full_14, + floor_24, + divide_24, + multiply_24, + add_95, + layer_norm_90, + layer_norm_91, + layer_norm_92, + matmul_70, + add_96, + gelu_13, + matmul_71, + add_97, + assign_155, + floor_25, + divide_25, + multiply_25, + add_98, + layer_norm_93, + layer_norm_94, + layer_norm_95, + reshape_124, + transpose_85, + reshape_125, + reshape_126, + matmul_72, + add_99, + transpose_86, + slice_14, + assign_162, + scale_14, + transpose_87, + matmul_73, + reshape_127, + index_select_14, + transpose_88, + unsqueeze_21, + softmax_14, + transpose_89, + reshape_128, + matmul_74, + add_100, + reshape_129, + transpose_90, + reshape_130, + reshape_131, + full_15, + floor_26, + divide_26, + multiply_26, + add_101, + layer_norm_96, + layer_norm_97, + layer_norm_98, + matmul_75, + add_102, + gelu_14, + matmul_76, + add_103, + assign_164, + floor_27, + divide_27, + multiply_27, + add_104, + layer_norm_99, + layer_norm_100, + layer_norm_101, + reshape_132, + roll_14, + transpose_91, + reshape_133, + reshape_134, + matmul_77, + add_105, + transpose_92, + slice_15, + assign_172, + scale_15, + transpose_93, + matmul_78, + reshape_135, + index_select_15, + transpose_94, + unsqueeze_22, + add_106, + reshape_136, + unsqueeze_23, + add_107, + softmax_15, + transpose_95, + reshape_137, + matmul_79, + add_108, + reshape_138, + transpose_96, + reshape_139, + roll_15, + reshape_140, + full_16, + floor_28, + divide_28, + multiply_28, + add_109, + layer_norm_102, + layer_norm_103, + layer_norm_104, + matmul_80, + add_110, + gelu_15, + matmul_81, + add_111, + assign_175, + floor_29, + divide_29, + multiply_29, + add_112, + layer_norm_105, + layer_norm_106, + layer_norm_107, + reshape_141, + transpose_97, + reshape_142, + reshape_143, + matmul_82, + add_113, + transpose_98, + slice_16, + assign_182, + scale_16, + transpose_99, + matmul_83, + reshape_144, + index_select_16, + transpose_100, + unsqueeze_24, + softmax_16, + transpose_101, + reshape_145, + matmul_84, + add_114, + reshape_146, + transpose_102, + reshape_147, + reshape_148, + full_17, + floor_30, + divide_30, + multiply_30, + add_115, + layer_norm_108, + layer_norm_109, + layer_norm_110, + matmul_85, + add_116, + gelu_16, + matmul_86, + add_117, + assign_184, + floor_31, + divide_31, + multiply_31, + add_118, + layer_norm_111, + layer_norm_112, + layer_norm_113, + reshape_149, + roll_16, + transpose_103, + reshape_150, + reshape_151, + matmul_87, + add_119, + transpose_104, + slice_17, + assign_192, + scale_17, + transpose_105, + matmul_88, + reshape_152, + index_select_17, + transpose_106, + unsqueeze_25, + add_120, + reshape_153, + unsqueeze_26, + add_121, + softmax_17, + transpose_107, + reshape_154, + matmul_89, + add_122, + reshape_155, + transpose_108, + reshape_156, + roll_17, + reshape_157, + full_18, + floor_32, + divide_32, + multiply_32, + add_123, + layer_norm_114, + layer_norm_115, + layer_norm_116, + matmul_90, + add_124, + gelu_17, + matmul_91, + add_125, + assign_195, + floor_33, + divide_33, + multiply_33, + add_126, + layer_norm_117, + layer_norm_118, + layer_norm_119, + reshape_158, + transpose_109, + reshape_159, + reshape_160, + matmul_92, + add_127, + transpose_110, + slice_18, + assign_202, + scale_18, + transpose_111, + matmul_93, + reshape_161, + index_select_18, + transpose_112, + unsqueeze_27, + softmax_18, + transpose_113, + reshape_162, + matmul_94, + add_128, + reshape_163, + transpose_114, + reshape_164, + reshape_165, + full_19, + floor_34, + divide_34, + multiply_34, + add_129, + layer_norm_120, + layer_norm_121, + layer_norm_122, + matmul_95, + add_130, + gelu_18, + matmul_96, + add_131, + assign_204, + floor_35, + divide_35, + multiply_35, + add_132, + layer_norm_123, + layer_norm_124, + layer_norm_125, + reshape_166, + roll_18, + transpose_115, + reshape_167, + reshape_168, + matmul_97, + add_133, + transpose_116, + slice_19, + assign_212, + scale_19, + transpose_117, + matmul_98, + reshape_169, + index_select_19, + transpose_118, + unsqueeze_28, + add_134, + reshape_170, + unsqueeze_29, + add_135, + softmax_19, + transpose_119, + reshape_171, + matmul_99, + add_136, + reshape_172, + transpose_120, + reshape_173, + roll_19, + reshape_174, + full_20, + floor_36, + divide_36, + multiply_36, + add_137, + layer_norm_126, + layer_norm_127, + layer_norm_128, + matmul_100, + add_138, + gelu_19, + matmul_101, + add_139, + assign_215, + floor_37, + divide_37, + multiply_37, + add_140, + layer_norm_129, + layer_norm_130, + layer_norm_131, + reshape_175, + transpose_121, + reshape_176, + reshape_177, + matmul_102, + add_141, + transpose_122, + slice_20, + assign_222, + scale_20, + transpose_123, + matmul_103, + reshape_178, + index_select_20, + transpose_124, + unsqueeze_30, + softmax_20, + transpose_125, + reshape_179, + matmul_104, + add_142, + reshape_180, + transpose_126, + reshape_181, + reshape_182, + full_21, + floor_38, + divide_38, + multiply_38, + add_143, + layer_norm_132, + layer_norm_133, + layer_norm_134, + matmul_105, + add_144, + gelu_20, + matmul_106, + add_145, + assign_224, + floor_39, + divide_39, + multiply_39, + add_146, + layer_norm_135, + layer_norm_136, + layer_norm_137, + reshape_183, + roll_20, + transpose_127, + reshape_184, + reshape_185, + matmul_107, + add_147, + transpose_128, + slice_21, + assign_232, + scale_21, + transpose_129, + matmul_108, + reshape_186, + index_select_21, + transpose_130, + unsqueeze_31, + add_148, + reshape_187, + unsqueeze_32, + add_149, + softmax_21, + transpose_131, + reshape_188, + matmul_109, + add_150, + reshape_189, + transpose_132, + reshape_190, + roll_21, + reshape_191, + full_22, + floor_40, + divide_40, + multiply_40, + add_151, + layer_norm_138, + layer_norm_139, + layer_norm_140, + matmul_110, + add_152, + gelu_21, + matmul_111, + add_153, + assign_235, + floor_41, + divide_41, + multiply_41, + add_154, + reshape_192, + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + assign_248, + concat_2, + reshape_193, + layer_norm_141, + layer_norm_142, + layer_norm_143, + matmul_112, + layer_norm_144, + layer_norm_145, + layer_norm_146, + reshape_194, + transpose_133, + reshape_195, + reshape_196, + matmul_113, + add_155, + transpose_134, + slice_22, + assign_255, + scale_22, + transpose_135, + matmul_114, + reshape_197, + index_select_22, + transpose_136, + unsqueeze_33, + softmax_22, + transpose_137, + reshape_198, + matmul_115, + add_156, + reshape_199, + transpose_138, + reshape_200, + reshape_201, + full_23, + floor_42, + divide_42, + multiply_42, + add_157, + layer_norm_147, + layer_norm_148, + layer_norm_149, + matmul_116, + add_158, + gelu_22, + matmul_117, + add_159, + assign_257, + floor_43, + divide_43, + multiply_43, + add_160, + layer_norm_150, + layer_norm_151, + layer_norm_152, + reshape_202, + roll_22, + transpose_139, + reshape_203, + reshape_204, + matmul_118, + add_161, + transpose_140, + slice_23, + assign_265, + scale_23, + transpose_141, + matmul_119, + reshape_205, + index_select_23, + transpose_142, + unsqueeze_34, + add_162, + reshape_206, + unsqueeze_35, + add_163, + softmax_23, + transpose_143, + reshape_207, + matmul_120, + add_164, + reshape_208, + transpose_144, + reshape_209, + roll_23, + reshape_210, + full_24, + floor_44, + divide_44, + multiply_44, + add_165, + layer_norm_153, + layer_norm_154, + layer_norm_155, + matmul_121, + add_166, + gelu_23, + matmul_122, + add_167, + assign_268, + floor_45, + divide_45, + multiply_45, + add_168, + layer_norm_156, + layer_norm_157, + transpose_145, + unsqueeze_36, + pool2d_0, + squeeze_0, + flatten_0, + matmul_123, + add_169, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + set_value__6, + set_value__7, + set_value__8, + set_value__9, + set_value__10, + set_value__11, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/weight_meta.py new file mode 100644 index 00000000..547d11be --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_small_patch4_window7_224/subgraph_2/weight_meta.py @@ -0,0 +1,2743 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [1536, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [768, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [384, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [384, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [96, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [96, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [96, 288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [384, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [96, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [96, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [96, 288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [96, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/graph_hash.txt new file mode 100644 index 00000000..74dd08e2 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/graph_hash.txt @@ -0,0 +1 @@ +3b849b190c48ef4072dbe90b9d91b07b0dd58fc0a7b7f5f59ae0f85cec51971d \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/graph_net.json b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/input_meta.py b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/input_meta.py new file mode 100644 index 00000000..7f58150a --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/input_meta.py @@ -0,0 +1,223 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [64, 3, 224, 224] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [169, 6] + dtype = "float32" + low = -6.35951 + high = 7.23264 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [169, 3] + dtype = "float32" + low = -10.7112 + high = 4.39772 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [169, 6] + dtype = "float32" + low = -9.1058 + high = 5.17221 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [169, 12] + dtype = "float32" + low = -10.2288 + high = 4.45899 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [169, 12] + dtype = "float32" + low = -10.5655 + high = 4.74832 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [169, 12] + dtype = "float32" + low = -11.5152 + high = 3.18727 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [169, 12] + dtype = "float32" + low = -7.50729 + high = 3.69654 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [169, 12] + dtype = "float32" + low = -10.5941 + high = 3.73884 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [169, 12] + dtype = "float32" + low = -13.1412 + high = 3.28064 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [169, 3] + dtype = "float32" + low = -8.27699 + high = 3.6692 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/model.py b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/model.py new file mode 100644 index 00000000..b4af4cc6 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/model.py @@ -0,0 +1,5728 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + ): + # pd_op.conv2d: (64x96x56x56xf32) <- (64x3x224x224xf32, 96x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_160, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_160 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, -1, 1, 1] + + # pd_op.reshape: (1x96x1x1xf32) <- (96xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_159, full_int_array_9) + del full_int_array_9, parameter_159 + + # pd_op.add: (64x96x56x56xf32) <- (64x96x56x56xf32, 1x96x1x1xf32) + add_0 = paddle._C_ops.add(conv2d_0, reshape_0) + + # pd_op.flatten: (64x96x3136xf32) <- (64x96x56x56xf32) + flatten_1 = paddle._C_ops.flatten(add_0, 2, 3) + + # pd_op.transpose: (64x3136x96xf32) <- (64x96x3136xf32) + transpose_0 = paddle._C_ops.transpose(flatten_1, [0, 2, 1]) + del flatten_1 + + # pd_op.layer_norm: (64x3136x96xf32, 64x3136xf32, 64x3136xf32) <- (64x3136x96xf32, 96xf32, 96xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_158, parameter_157, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_157, parameter_158 + + # pd_op.layer_norm: (64x3136x96xf32, 64x3136xf32, 64x3136xf32) <- (64x3136x96xf32, 96xf32, 96xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [64, 56, 56, 96] + + # pd_op.reshape: (64x56x56x96xf32) <- (64x3136x96xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, full_int_array_10) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_11 = [64, 8, 7, 8, 7, 96] + + # pd_op.reshape: (64x8x7x8x7x96xf32) <- (64x56x56x96xf32, 6xi64) + reshape_109 = paddle._C_ops.reshape(reshape_1, full_int_array_11) + + # pd_op.transpose: (64x8x8x7x7x96xf32) <- (64x8x7x8x7x96xf32) + transpose_1 = paddle._C_ops.transpose(reshape_109, [0, 1, 3, 2, 4, 5]) + del reshape_109 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_12 = [-1, 7, 7, 96] + + # pd_op.reshape: (4096x7x7x96xf32) <- (64x8x8x7x7x96xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(transpose_1, full_int_array_12) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_13 = [-1, 49, 96] + + # pd_op.reshape: (4096x49x96xf32) <- (4096x7x7x96xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(reshape_2, full_int_array_13) + + # pd_op.matmul: (4096x49x288xf32) <- (4096x49x96xf32, 96x288xf32) + matmul_0 = paddle._C_ops.matmul(reshape_3, parameter_154, False, False) + del parameter_154 + + # pd_op.add: (4096x49x288xf32) <- (4096x49x288xf32, 288xf32) + add_1 = paddle._C_ops.add(matmul_0, parameter_153) + del parameter_153 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_14 = [4096, 49, 3, 3, 32] + + # pd_op.reshape: (4096x49x3x3x32xf32) <- (4096x49x288xf32, 5xi64) + reshape_110 = paddle._C_ops.reshape(add_1, full_int_array_14) + + # pd_op.transpose: (3x4096x3x49x32xf32) <- (4096x49x3x3x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_110, [2, 0, 3, 1, 4]) + del reshape_110 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_144 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_137 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_134 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_127 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_111 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_104 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_101 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_94 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_91 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_84 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_81 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_74 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_71 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_64 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_61 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_54 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_38 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_31 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_28 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_21 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_10 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_3 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_2 = full_int_array_0 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [1] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_139 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_138 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_129 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_128 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_106 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_105 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_96 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_95 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_86 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_85 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_76 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_75 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_66 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_65 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_56 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_55 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_33 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_32 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_23 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_22 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_5 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_4 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_0 = full_int_array_1 + + # pd_op.slice: (4096x3x49x32xf32) <- (3x4096x3x49x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_2 = [2] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_149 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_147 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_141 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_140 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_131 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_130 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_108 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_107 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_98 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_97 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_88 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_87 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_78 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_77 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_68 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_67 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_58 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_57 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_35 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_34 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_25 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_24 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_7 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_6 = full_int_array_2 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_1 = full_int_array_2 + + # pd_op.slice: (4096x3x49x32xf32) <- (3x4096x3x49x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [3] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_142 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_132 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_109 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_99 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_89 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_79 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_69 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_59 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_36 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_26 = full_int_array_3 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_8 = full_int_array_3 + + # pd_op.slice: (4096x3x49x32xf32) <- (3x4096x3x49x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xf32) <- (1xf32) + assign_143 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_133 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_110 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_100 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_90 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_80 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_70 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_60 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_37 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_27 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_9 = full_0 + + # pd_op.scale: (4096x3x49x32xf32) <- (4096x3x49x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_12, full_0, float("0"), True) + del slice_12 + + # pd_op.transpose: (4096x3x32x49xf32) <- (4096x3x49x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_13, [0, 1, 3, 2]) + del slice_13 + + # pd_op.matmul: (4096x3x49x49xf32) <- (4096x3x49x32xf32, 4096x3x32x49xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_15 = [-1] + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_4 = paddle._C_ops.reshape(data_3, full_int_array_15) + del data_3 + + # pd_op.index_select: (2401x3xf32) <- (169x3xf32, 2401xi64) + index_select_0 = paddle._C_ops.index_select(data_4, reshape_4, 0) + del data_4 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_16 = [49, 49, -1] + + # pd_op.reshape: (49x49x3xf32) <- (2401x3xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(index_select_0, full_int_array_16) + + # pd_op.transpose: (3x49x49xf32) <- (49x49x3xf32) + transpose_4 = paddle._C_ops.transpose(reshape_111, [2, 0, 1]) + del reshape_111 + + # pd_op.unsqueeze: (1x3x49x49xf32) <- (3x49x49xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_0) + + # pd_op.add: (4096x3x49x49xf32) <- (4096x3x49x49xf32, 1x3x49x49xf32) + add_86 = paddle._C_ops.add(matmul_1, unsqueeze_0) + + # pd_op.softmax: (4096x3x49x49xf32) <- (4096x3x49x49xf32) + softmax_0 = paddle._C_ops.softmax(add_86, -1) + del add_86 + + # pd_op.matmul: (4096x3x49x32xf32) <- (4096x3x49x49xf32, 4096x3x49x32xf32) + matmul_64 = paddle._C_ops.matmul(softmax_0, slice_0, False, False) + + # pd_op.transpose: (4096x49x3x32xf32) <- (4096x3x49x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_64, [0, 2, 1, 3]) + del matmul_64 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_17 = [4096, 49, 96] + + # pd_op.reshape: (4096x49x96xf32) <- (4096x49x3x32xf32, 3xi64) + reshape_5 = paddle._C_ops.reshape(transpose_5, full_int_array_17) + + # pd_op.matmul: (4096x49x96xf32) <- (4096x49x96xf32, 96x96xf32) + matmul_2 = paddle._C_ops.matmul(reshape_5, parameter_152, False, False) + del parameter_152 + + # pd_op.add: (4096x49x96xf32) <- (4096x49x96xf32, 96xf32) + add_2 = paddle._C_ops.add(matmul_2, parameter_151) + del parameter_151 + + # pd_op.reshape: (4096x7x7x96xf32) <- (4096x49x96xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_2, full_int_array_12) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_18 = [-1, 8, 8, 7, 7, 96] + + # pd_op.reshape: (64x8x8x7x7x96xf32) <- (4096x7x7x96xf32, 6xi64) + reshape_112 = paddle._C_ops.reshape(reshape_6, full_int_array_18) + + # pd_op.transpose: (64x8x7x8x7x96xf32) <- (64x8x8x7x7x96xf32) + transpose_6 = paddle._C_ops.transpose(reshape_112, [0, 1, 3, 2, 4, 5]) + del reshape_112 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_19 = [-1, 56, 56, 96] + + # pd_op.reshape: (64x56x56x96xf32) <- (64x8x7x8x7x96xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(transpose_6, full_int_array_19) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_20 = [64, 3136, 96] + + # pd_op.reshape: (64x3136x96xf32) <- (64x56x56x96xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(reshape_7, full_int_array_20) + + # pd_op.add: (64x3136x96xf32) <- (64x3136x96xf32, 64x3136x96xf32) + add_3 = paddle._C_ops.add(layer_norm_0, reshape_8) + + # pd_op.layer_norm: (64x3136x96xf32, 64x3136xf32, 64x3136xf32) <- (64x3136x96xf32, 96xf32, 96xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_3, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # pd_op.matmul: (64x3136x384xf32) <- (64x3136x96xf32, 96x384xf32) + matmul_3 = paddle._C_ops.matmul(layer_norm_6, parameter_148, False, False) + del parameter_148 + + # pd_op.add: (64x3136x384xf32) <- (64x3136x384xf32, 384xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_147) + del parameter_147 + + # pd_op.gelu: (64x3136x384xf32) <- (64x3136x384xf32) + gelu_0 = paddle._C_ops.gelu(add_4, False) + + # pd_op.matmul: (64x3136x96xf32) <- (64x3136x384xf32, 384x96xf32) + matmul_4 = paddle._C_ops.matmul(gelu_0, parameter_146, False, False) + del parameter_146 + + # pd_op.add: (64x3136x96xf32) <- (64x3136x96xf32, 96xf32) + add_5 = paddle._C_ops.add(matmul_4, parameter_145) + del parameter_145 + + # pd_op.add: (64x3136x96xf32) <- (64x3136x96xf32, 64x3136x96xf32) + add_6 = paddle._C_ops.add(add_3, add_5) + + # pd_op.layer_norm: (64x3136x96xf32, 64x3136xf32, 64x3136xf32) <- (64x3136x96xf32, 96xf32, 96xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # pd_op.reshape: (64x56x56x96xf32) <- (64x3136x96xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(layer_norm_9, full_int_array_10) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_4 = [-3, -3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_136 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_103 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_83 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_63 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_30 = full_int_array_4 + + # pd_op.roll: (64x56x56x96xf32) <- (64x56x56x96xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_9, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x8x7x8x7x96xf32) <- (64x56x56x96xf32, 6xi64) + reshape_113 = paddle._C_ops.reshape(roll_0, full_int_array_11) + del full_int_array_11 + + # pd_op.transpose: (64x8x8x7x7x96xf32) <- (64x8x7x8x7x96xf32) + transpose_7 = paddle._C_ops.transpose(reshape_113, [0, 1, 3, 2, 4, 5]) + del reshape_113 + + # pd_op.reshape: (4096x7x7x96xf32) <- (64x8x8x7x7x96xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(transpose_7, full_int_array_12) + + # pd_op.reshape: (4096x49x96xf32) <- (4096x7x7x96xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(reshape_10, full_int_array_13) + del full_int_array_13 + + # pd_op.full: (1x56x56x1xf32) <- () + full_13 = paddle._C_ops.full( + [1, 56, 56, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [0, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_114 = full_int_array_21 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_41 = full_int_array_21 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_12 = full_int_array_21 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-7, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [1, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_148 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_123 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_50 = full_int_array_23 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_18 = full_int_array_23 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + full_13, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [0, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [-7, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__6, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_26 = [0, -3] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_27 = [-7, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__7, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__7 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [-7, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [-3, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__8, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__8 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__9, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__9 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_30 = [-3, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__10, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__10 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_31 = [-3, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [2147483647, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + set_value__11, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__11 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_33 = [2147483647, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_34 = [2147483647, 2147483647] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_124 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_121 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_118 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_115 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_51 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_48 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_45 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_42 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_19 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_16 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_14 = full_int_array_34 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_13 = full_int_array_34 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__13 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [1, 8, 7, 8, 7, 1] + + # pd_op.reshape: (1x8x7x8x7x1xf32) <- (1x56x56x1xf32, 6xi64) + reshape_114 = paddle._C_ops.reshape(set_value__0, full_int_array_35) + del full_int_array_35 + + # pd_op.transpose: (1x8x8x7x7x1xf32) <- (1x8x7x8x7x1xf32) + transpose_74 = paddle._C_ops.transpose(reshape_114, [0, 1, 3, 2, 4, 5]) + del reshape_114 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 7, 7, 1] + + # pd_op.reshape: (64x7x7x1xf32) <- (1x8x8x7x7x1xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(transpose_74, full_int_array_36) + del transpose_74 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_37 = [-1, 49] + + # pd_op.reshape: (64x49xf32) <- (64x7x7x1xf32, 2xi64) + reshape_116 = paddle._C_ops.reshape(reshape_115, full_int_array_37) + del reshape_115 + + # pd_op.unsqueeze: (64x1x49xf32) <- (64x49xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(reshape_116, full_int_array_1) + + # pd_op.unsqueeze: (64x49x1xf32) <- (64x49xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(reshape_116, full_int_array_2) + del reshape_116 + + # pd_op.subtract: (64x49x49xf32) <- (64x1x49xf32, 64x49x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_19, unsqueeze_20) + del unsqueeze_19, unsqueeze_20 + + # pd_op.full: (xf32) <- () + full_14 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x49x49xb) <- (64x49x49xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_14) + + # pd_op.full: (64x49x49xf32) <- () + full_15 = paddle._C_ops.full( + [64, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_15, subtract_0) + del full_15, not_equal_0, subtract_0 + + # pd_op.equal: (64x49x49xb) <- (64x49x49xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_14) + + # pd_op.full: (64x49x49xf32) <- () + full_16 = paddle._C_ops.full( + [64, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_1 = paddle._C_ops.where(equal_0, full_16, where_0) + del equal_0, full_16, where_0 + + # pd_op.matmul: (4096x49x288xf32) <- (4096x49x96xf32, 96x288xf32) + matmul_5 = paddle._C_ops.matmul(reshape_11, parameter_142, False, False) + del parameter_142 + + # pd_op.add: (4096x49x288xf32) <- (4096x49x288xf32, 288xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_141) + del parameter_141 + + # pd_op.reshape: (4096x49x3x3x32xf32) <- (4096x49x288xf32, 5xi64) + reshape_117 = paddle._C_ops.reshape(add_7, full_int_array_14) + del full_int_array_14 + + # pd_op.transpose: (3x4096x3x49x32xf32) <- (4096x49x3x3x32xf32) + transpose_8 = paddle._C_ops.transpose(reshape_117, [2, 0, 3, 1, 4]) + del reshape_117 + + # pd_op.slice: (4096x3x49x32xf32) <- (3x4096x3x49x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (4096x3x49x32xf32) <- (3x4096x3x49x32xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (4096x3x49x32xf32) <- (3x4096x3x49x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (4096x3x49x32xf32) <- (4096x3x49x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_14, full_0, float("0"), True) + del slice_14 + + # pd_op.transpose: (4096x3x32x49xf32) <- (4096x3x49x32xf32) + transpose_9 = paddle._C_ops.transpose(slice_15, [0, 1, 3, 2]) + del slice_15 + + # pd_op.matmul: (4096x3x49x49xf32) <- (4096x3x49x32xf32, 4096x3x32x49xf32) + matmul_6 = paddle._C_ops.matmul(scale_1, transpose_9, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_12 = paddle._C_ops.reshape(data_23, full_int_array_15) + del data_23 + + # pd_op.index_select: (2401x3xf32) <- (169x3xf32, 2401xi64) + index_select_1 = paddle._C_ops.index_select(data_24, reshape_12, 0) + del data_24 + + # pd_op.reshape: (49x49x3xf32) <- (2401x3xf32, 3xi64) + reshape_118 = paddle._C_ops.reshape(index_select_1, full_int_array_16) + + # pd_op.transpose: (3x49x49xf32) <- (49x49x3xf32) + transpose_10 = paddle._C_ops.transpose(reshape_118, [2, 0, 1]) + del reshape_118 + + # pd_op.unsqueeze: (1x3x49x49xf32) <- (3x49x49xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(transpose_10, full_int_array_0) + + # pd_op.add: (4096x3x49x49xf32) <- (4096x3x49x49xf32, 1x3x49x49xf32) + add_8 = paddle._C_ops.add(matmul_6, unsqueeze_1) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_38 = [64, 64, 3, 49, 49] + + # pd_op.reshape: (64x64x3x49x49xf32) <- (4096x3x49x49xf32, 5xi64) + reshape_13 = paddle._C_ops.reshape(add_8, full_int_array_38) + del full_int_array_38 + + # pd_op.unsqueeze: (64x1x49x49xf32) <- (64x49x49xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(where_1, full_int_array_1) + del where_1 + + # pd_op.unsqueeze: (1x64x1x49x49xf32) <- (64x1x49x49xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(unsqueeze_21, full_int_array_0) + del unsqueeze_21 + + # pd_op.add: (64x64x3x49x49xf32) <- (64x64x3x49x49xf32, 1x64x1x49x49xf32) + add_9 = paddle._C_ops.add(reshape_13, unsqueeze_2) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_39 = [4096, 3, 49, 49] + + # pd_op.reshape: (4096x3x49x49xf32) <- (64x64x3x49x49xf32, 4xi64) + reshape_119 = paddle._C_ops.reshape(add_9, full_int_array_39) + del full_int_array_39 + + # pd_op.softmax: (4096x3x49x49xf32) <- (4096x3x49x49xf32) + softmax_1 = paddle._C_ops.softmax(reshape_119, -1) + del reshape_119 + + # pd_op.matmul: (4096x3x49x32xf32) <- (4096x3x49x49xf32, 4096x3x49x32xf32) + matmul_65 = paddle._C_ops.matmul(softmax_1, slice_1, False, False) + + # pd_op.transpose: (4096x49x3x32xf32) <- (4096x3x49x32xf32) + transpose_11 = paddle._C_ops.transpose(matmul_65, [0, 2, 1, 3]) + del matmul_65 + + # pd_op.reshape: (4096x49x96xf32) <- (4096x49x3x32xf32, 3xi64) + reshape_14 = paddle._C_ops.reshape(transpose_11, full_int_array_17) + del full_int_array_17 + + # pd_op.matmul: (4096x49x96xf32) <- (4096x49x96xf32, 96x96xf32) + matmul_7 = paddle._C_ops.matmul(reshape_14, parameter_140, False, False) + del parameter_140 + + # pd_op.add: (4096x49x96xf32) <- (4096x49x96xf32, 96xf32) + add_10 = paddle._C_ops.add(matmul_7, parameter_139) + del parameter_139 + + # pd_op.reshape: (4096x7x7x96xf32) <- (4096x49x96xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(add_10, full_int_array_12) + del full_int_array_12 + + # pd_op.reshape: (64x8x8x7x7x96xf32) <- (4096x7x7x96xf32, 6xi64) + reshape_120 = paddle._C_ops.reshape(reshape_15, full_int_array_18) + del full_int_array_18 + + # pd_op.transpose: (64x8x7x8x7x96xf32) <- (64x8x8x7x7x96xf32) + transpose_12 = paddle._C_ops.transpose(reshape_120, [0, 1, 3, 2, 4, 5]) + del reshape_120 + + # pd_op.reshape: (64x56x56x96xf32) <- (64x8x7x8x7x96xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(transpose_12, full_int_array_19) + del full_int_array_19 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [3, 3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_145 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_112 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_92 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_72 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_39 = full_int_array_5 + + # pd_op.roll: (64x56x56x96xf32) <- (64x56x56x96xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_16, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x3136x96xf32) <- (64x56x56x96xf32, 3xi64) + reshape_17 = paddle._C_ops.reshape(roll_1, full_int_array_20) + del full_int_array_20 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], + float("0.981818"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_11 = full_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_40 = [64, 1, 1] + + # pd_op.full: (1xf32) <- () + full_17 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf32) <- () + full_18 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_0 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_87 = paddle._C_ops.add(full_1, uniform_0) + del uniform_0 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_0 = paddle._C_ops.floor(add_87) + del add_87 + + # pd_op.divide: (64x3136x96xf32) <- (64x3136x96xf32, xf32) + divide_0 = paddle._C_ops.divide(reshape_17, full_1) + + # pd_op.multiply: (64x3136x96xf32) <- (64x3136x96xf32, 64x1x1xf32) + multiply_0 = paddle._C_ops.multiply(divide_0, floor_0) + + # pd_op.add: (64x3136x96xf32) <- (64x3136x96xf32, 64x3136x96xf32) + add_11 = paddle._C_ops.add(add_6, multiply_0) + + # pd_op.layer_norm: (64x3136x96xf32, 64x3136xf32, 64x3136xf32) <- (64x3136x96xf32, 96xf32, 96xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # pd_op.matmul: (64x3136x384xf32) <- (64x3136x96xf32, 96x384xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_12, parameter_136, False, False) + del parameter_136 + + # pd_op.add: (64x3136x384xf32) <- (64x3136x384xf32, 384xf32) + add_12 = paddle._C_ops.add(matmul_8, parameter_135) + del parameter_135 + + # pd_op.gelu: (64x3136x384xf32) <- (64x3136x384xf32) + gelu_1 = paddle._C_ops.gelu(add_12, False) + + # pd_op.matmul: (64x3136x96xf32) <- (64x3136x384xf32, 384x96xf32) + matmul_9 = paddle._C_ops.matmul(gelu_1, parameter_134, False, False) + del parameter_134 + + # pd_op.add: (64x3136x96xf32) <- (64x3136x96xf32, 96xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_133) + del parameter_133 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_1 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_88 = paddle._C_ops.add(full_1, uniform_1) + del uniform_1 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_1 = paddle._C_ops.floor(add_88) + del add_88 + + # pd_op.divide: (64x3136x96xf32) <- (64x3136x96xf32, xf32) + divide_1 = paddle._C_ops.divide(add_13, full_1) + + # pd_op.multiply: (64x3136x96xf32) <- (64x3136x96xf32, 64x1x1xf32) + multiply_1 = paddle._C_ops.multiply(divide_1, floor_1) + + # pd_op.add: (64x3136x96xf32) <- (64x3136x96xf32, 64x3136x96xf32) + add_14 = paddle._C_ops.add(add_11, multiply_1) + + # pd_op.reshape: (64x56x56x96xf32) <- (64x3136x96xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_14, full_int_array_10) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_6 = [2, 2] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_125 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_122 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_119 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_116 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_52 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_49 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_46 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_43 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_20 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_17 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_15 = full_int_array_6 + + # pd_op.strided_slice: (64x28x28x96xf32) <- (64x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_7 = [1, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_117 = full_int_array_7 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_44 = full_int_array_7 + + # pd_op.strided_slice: (64x28x28x96xf32) <- (64x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_8 = [0, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_120 = full_int_array_8 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_47 = full_int_array_8 + + # pd_op.strided_slice: (64x28x28x96xf32) <- (64x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x28x28x96xf32) <- (64x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (64x56x56x96xf32) <- (64x56x56x96xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(reshape_18, full_int_array_10) + del full_int_array_10 + + # pd_op.full: (1xi32) <- () + full_2 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xi32) <- (1xi32) + assign_126 = full_2 + + # pd_op.assign: (1xi32) <- (1xi32) + assign_53 = full_2 + + # builtin.combine: ([64x28x28x96xf32, 64x28x28x96xf32, 64x28x28x96xf32, 64x28x28x96xf32]) <- (64x28x28x96xf32, 64x28x28x96xf32, 64x28x28x96xf32, 64x28x28x96xf32) + combine_0 = [strided_slice_0, strided_slice_1, strided_slice_2, strided_slice_3] + + # pd_op.concat: (64x28x28x384xf32) <- ([64x28x28x96xf32, 64x28x28x96xf32, 64x28x28x96xf32, 64x28x28x96xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_0, full_2) + del combine_0 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_41 = [64, -1, 384] + + # pd_op.reshape: (64x784x384xf32) <- (64x28x28x384xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(concat_0, full_int_array_41) + del full_int_array_41 + + # pd_op.layer_norm: (64x784x384xf32, 64x784xf32, 64x784xf32) <- (64x784x384xf32, 384xf32, 384xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_19, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (64x784x192xf32) <- (64x784x384xf32, 384x192xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_15, parameter_130, False, False) + del parameter_130 + + # pd_op.layer_norm: (64x784x192xf32, 64x784xf32, 64x784xf32) <- (64x784x192xf32, 192xf32, 192xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_10, parameter_129, parameter_128, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_128, parameter_129 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_42 = [64, 28, 28, 192] + + # pd_op.reshape: (64x28x28x192xf32) <- (64x784x192xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(layer_norm_18, full_int_array_42) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_43 = [64, 4, 7, 4, 7, 192] + + # pd_op.reshape: (64x4x7x4x7x192xf32) <- (64x28x28x192xf32, 6xi64) + reshape_122 = paddle._C_ops.reshape(reshape_20, full_int_array_43) + + # pd_op.transpose: (64x4x4x7x7x192xf32) <- (64x4x7x4x7x192xf32) + transpose_13 = paddle._C_ops.transpose(reshape_122, [0, 1, 3, 2, 4, 5]) + del reshape_122 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_44 = [-1, 7, 7, 192] + + # pd_op.reshape: (1024x7x7x192xf32) <- (64x4x4x7x7x192xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(transpose_13, full_int_array_44) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_45 = [-1, 49, 192] + + # pd_op.reshape: (1024x49x192xf32) <- (1024x7x7x192xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(reshape_21, full_int_array_45) + + # pd_op.matmul: (1024x49x576xf32) <- (1024x49x192xf32, 192x576xf32) + matmul_11 = paddle._C_ops.matmul(reshape_22, parameter_127, False, False) + del parameter_127 + + # pd_op.add: (1024x49x576xf32) <- (1024x49x576xf32, 576xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_126) + del parameter_126 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_46 = [1024, 49, 3, 6, 32] + + # pd_op.reshape: (1024x49x3x6x32xf32) <- (1024x49x576xf32, 5xi64) + reshape_123 = paddle._C_ops.reshape(add_15, full_int_array_46) + + # pd_op.transpose: (3x1024x6x49x32xf32) <- (1024x49x3x6x32xf32) + transpose_14 = paddle._C_ops.transpose(reshape_123, [2, 0, 3, 1, 4]) + del reshape_123 + + # pd_op.slice: (1024x6x49x32xf32) <- (3x1024x6x49x32xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (1024x6x49x32xf32) <- (3x1024x6x49x32xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (1024x6x49x32xf32) <- (3x1024x6x49x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (1024x6x49x32xf32) <- (1024x6x49x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_16, full_0, float("0"), True) + del slice_16 + + # pd_op.transpose: (1024x6x32x49xf32) <- (1024x6x49x32xf32) + transpose_15 = paddle._C_ops.transpose(slice_17, [0, 1, 3, 2]) + del slice_17 + + # pd_op.matmul: (1024x6x49x49xf32) <- (1024x6x49x32xf32, 1024x6x32x49xf32) + matmul_12 = paddle._C_ops.matmul(scale_2, transpose_15, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_23 = paddle._C_ops.reshape(data_1, full_int_array_15) + del data_1 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_2 = paddle._C_ops.index_select(data_2, reshape_23, 0) + del data_2 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_124 = paddle._C_ops.reshape(index_select_2, full_int_array_16) + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_16 = paddle._C_ops.transpose(reshape_124, [2, 0, 1]) + del reshape_124 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_16, full_int_array_0) + + # pd_op.add: (1024x6x49x49xf32) <- (1024x6x49x49xf32, 1x6x49x49xf32) + add_89 = paddle._C_ops.add(matmul_12, unsqueeze_3) + + # pd_op.softmax: (1024x6x49x49xf32) <- (1024x6x49x49xf32) + softmax_2 = paddle._C_ops.softmax(add_89, -1) + del add_89 + + # pd_op.matmul: (1024x6x49x32xf32) <- (1024x6x49x49xf32, 1024x6x49x32xf32) + matmul_66 = paddle._C_ops.matmul(softmax_2, slice_2, False, False) + + # pd_op.transpose: (1024x49x6x32xf32) <- (1024x6x49x32xf32) + transpose_17 = paddle._C_ops.transpose(matmul_66, [0, 2, 1, 3]) + del matmul_66 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_47 = [1024, 49, 192] + + # pd_op.reshape: (1024x49x192xf32) <- (1024x49x6x32xf32, 3xi64) + reshape_24 = paddle._C_ops.reshape(transpose_17, full_int_array_47) + + # pd_op.matmul: (1024x49x192xf32) <- (1024x49x192xf32, 192x192xf32) + matmul_13 = paddle._C_ops.matmul(reshape_24, parameter_125, False, False) + del parameter_125 + + # pd_op.add: (1024x49x192xf32) <- (1024x49x192xf32, 192xf32) + add_16 = paddle._C_ops.add(matmul_13, parameter_124) + del parameter_124 + + # pd_op.reshape: (1024x7x7x192xf32) <- (1024x49x192xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_16, full_int_array_44) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_48 = [-1, 4, 4, 7, 7, 192] + + # pd_op.reshape: (64x4x4x7x7x192xf32) <- (1024x7x7x192xf32, 6xi64) + reshape_125 = paddle._C_ops.reshape(reshape_25, full_int_array_48) + + # pd_op.transpose: (64x4x7x4x7x192xf32) <- (64x4x4x7x7x192xf32) + transpose_18 = paddle._C_ops.transpose(reshape_125, [0, 1, 3, 2, 4, 5]) + del reshape_125 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_49 = [-1, 28, 28, 192] + + # pd_op.reshape: (64x28x28x192xf32) <- (64x4x7x4x7x192xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(transpose_18, full_int_array_49) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_50 = [64, 784, 192] + + # pd_op.reshape: (64x784x192xf32) <- (64x28x28x192xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, full_int_array_50) + + # pd_op.full: (xf32) <- () + full_3 = paddle._C_ops.full( + [], + float("0.963636"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_29 = full_3 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_2 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_90 = paddle._C_ops.add(full_3, uniform_2) + del uniform_2 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_2 = paddle._C_ops.floor(add_90) + del add_90 + + # pd_op.divide: (64x784x192xf32) <- (64x784x192xf32, xf32) + divide_2 = paddle._C_ops.divide(reshape_27, full_3) + + # pd_op.multiply: (64x784x192xf32) <- (64x784x192xf32, 64x1x1xf32) + multiply_2 = paddle._C_ops.multiply(divide_2, floor_2) + + # pd_op.add: (64x784x192xf32) <- (64x784x192xf32, 64x784x192xf32) + add_17 = paddle._C_ops.add(matmul_10, multiply_2) + + # pd_op.layer_norm: (64x784x192xf32, 64x784xf32, 64x784xf32) <- (64x784x192xf32, 192xf32, 192xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_123, parameter_122, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_122, parameter_123 + + # pd_op.matmul: (64x784x768xf32) <- (64x784x192xf32, 192x768xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_21, parameter_121, False, False) + del parameter_121 + + # pd_op.add: (64x784x768xf32) <- (64x784x768xf32, 768xf32) + add_18 = paddle._C_ops.add(matmul_14, parameter_120) + del parameter_120 + + # pd_op.gelu: (64x784x768xf32) <- (64x784x768xf32) + gelu_2 = paddle._C_ops.gelu(add_18, False) + + # pd_op.matmul: (64x784x192xf32) <- (64x784x768xf32, 768x192xf32) + matmul_15 = paddle._C_ops.matmul(gelu_2, parameter_119, False, False) + del parameter_119 + + # pd_op.add: (64x784x192xf32) <- (64x784x192xf32, 192xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_118) + del parameter_118 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_3 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_91 = paddle._C_ops.add(full_3, uniform_3) + del uniform_3 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_3 = paddle._C_ops.floor(add_91) + del add_91 + + # pd_op.divide: (64x784x192xf32) <- (64x784x192xf32, xf32) + divide_3 = paddle._C_ops.divide(add_19, full_3) + + # pd_op.multiply: (64x784x192xf32) <- (64x784x192xf32, 64x1x1xf32) + multiply_3 = paddle._C_ops.multiply(divide_3, floor_3) + + # pd_op.add: (64x784x192xf32) <- (64x784x192xf32, 64x784x192xf32) + add_20 = paddle._C_ops.add(add_17, multiply_3) + + # pd_op.layer_norm: (64x784x192xf32, 64x784xf32, 64x784xf32) <- (64x784x192xf32, 192xf32, 192xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_117, parameter_116, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_116, parameter_117 + + # pd_op.reshape: (64x28x28x192xf32) <- (64x784x192xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(layer_norm_24, full_int_array_42) + + # pd_op.roll: (64x28x28x192xf32) <- (64x28x28x192xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_28, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x4x7x4x7x192xf32) <- (64x28x28x192xf32, 6xi64) + reshape_126 = paddle._C_ops.reshape(roll_2, full_int_array_43) + del full_int_array_43 + + # pd_op.transpose: (64x4x4x7x7x192xf32) <- (64x4x7x4x7x192xf32) + transpose_19 = paddle._C_ops.transpose(reshape_126, [0, 1, 3, 2, 4, 5]) + del reshape_126 + + # pd_op.reshape: (1024x7x7x192xf32) <- (64x4x4x7x7x192xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(transpose_19, full_int_array_44) + + # pd_op.reshape: (1024x49x192xf32) <- (1024x7x7x192xf32, 3xi64) + reshape_30 = paddle._C_ops.reshape(reshape_29, full_int_array_45) + del full_int_array_45 + + # pd_op.full: (1x28x28x1xf32) <- () + full_19 = paddle._C_ops.full( + [1, 28, 28, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + full_19, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_19 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__14 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__15 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__16 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__17 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__18 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__19 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__20 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__21 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_51 = [1, 4, 7, 4, 7, 1] + + # pd_op.reshape: (1x4x7x4x7x1xf32) <- (1x28x28x1xf32, 6xi64) + reshape_127 = paddle._C_ops.reshape(set_value__1, full_int_array_51) + del full_int_array_51 + + # pd_op.transpose: (1x4x4x7x7x1xf32) <- (1x4x7x4x7x1xf32) + transpose_75 = paddle._C_ops.transpose(reshape_127, [0, 1, 3, 2, 4, 5]) + del reshape_127 + + # pd_op.reshape: (16x7x7x1xf32) <- (1x4x4x7x7x1xf32, 4xi64) + reshape_128 = paddle._C_ops.reshape(transpose_75, full_int_array_36) + del transpose_75 + + # pd_op.reshape: (16x49xf32) <- (16x7x7x1xf32, 2xi64) + reshape_129 = paddle._C_ops.reshape(reshape_128, full_int_array_37) + del reshape_128 + + # pd_op.unsqueeze: (16x1x49xf32) <- (16x49xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(reshape_129, full_int_array_1) + + # pd_op.unsqueeze: (16x49x1xf32) <- (16x49xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(reshape_129, full_int_array_2) + del reshape_129 + + # pd_op.subtract: (16x49x49xf32) <- (16x1x49xf32, 16x49x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_22, unsqueeze_23) + del unsqueeze_22, unsqueeze_23 + + # pd_op.not_equal: (16x49x49xb) <- (16x49x49xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_14) + + # pd_op.full: (16x49x49xf32) <- () + full_20 = paddle._C_ops.full( + [16, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_20, subtract_1) + del full_20, not_equal_1, subtract_1 + + # pd_op.equal: (16x49x49xb) <- (16x49x49xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_14) + + # pd_op.full: (16x49x49xf32) <- () + full_21 = paddle._C_ops.full( + [16, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_3 = paddle._C_ops.where(equal_1, full_21, where_2) + del equal_1, full_21, where_2 + + # pd_op.matmul: (1024x49x576xf32) <- (1024x49x192xf32, 192x576xf32) + matmul_16 = paddle._C_ops.matmul(reshape_30, parameter_115, False, False) + del parameter_115 + + # pd_op.add: (1024x49x576xf32) <- (1024x49x576xf32, 576xf32) + add_21 = paddle._C_ops.add(matmul_16, parameter_114) + del parameter_114 + + # pd_op.reshape: (1024x49x3x6x32xf32) <- (1024x49x576xf32, 5xi64) + reshape_130 = paddle._C_ops.reshape(add_21, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (3x1024x6x49x32xf32) <- (1024x49x3x6x32xf32) + transpose_20 = paddle._C_ops.transpose(reshape_130, [2, 0, 3, 1, 4]) + del reshape_130 + + # pd_op.slice: (1024x6x49x32xf32) <- (3x1024x6x49x32xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (1024x6x49x32xf32) <- (3x1024x6x49x32xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (1024x6x49x32xf32) <- (3x1024x6x49x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (1024x6x49x32xf32) <- (1024x6x49x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_18, full_0, float("0"), True) + del slice_18 + + # pd_op.transpose: (1024x6x32x49xf32) <- (1024x6x49x32xf32) + transpose_21 = paddle._C_ops.transpose(slice_19, [0, 1, 3, 2]) + del slice_19 + + # pd_op.matmul: (1024x6x49x49xf32) <- (1024x6x49x32xf32, 1024x6x32x49xf32) + matmul_17 = paddle._C_ops.matmul(scale_3, transpose_21, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_31 = paddle._C_ops.reshape(data_5, full_int_array_15) + del data_5 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_3 = paddle._C_ops.index_select(data_6, reshape_31, 0) + del data_6 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_131 = paddle._C_ops.reshape(index_select_3, full_int_array_16) + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_22 = paddle._C_ops.transpose(reshape_131, [2, 0, 1]) + del reshape_131 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_22, full_int_array_0) + + # pd_op.add: (1024x6x49x49xf32) <- (1024x6x49x49xf32, 1x6x49x49xf32) + add_22 = paddle._C_ops.add(matmul_17, unsqueeze_4) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_52 = [64, 16, 6, 49, 49] + + # pd_op.reshape: (64x16x6x49x49xf32) <- (1024x6x49x49xf32, 5xi64) + reshape_32 = paddle._C_ops.reshape(add_22, full_int_array_52) + del full_int_array_52 + + # pd_op.unsqueeze: (16x1x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(where_3, full_int_array_1) + del where_3 + + # pd_op.unsqueeze: (1x16x1x49x49xf32) <- (16x1x49x49xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_24, full_int_array_0) + del unsqueeze_24 + + # pd_op.add: (64x16x6x49x49xf32) <- (64x16x6x49x49xf32, 1x16x1x49x49xf32) + add_23 = paddle._C_ops.add(reshape_32, unsqueeze_5) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_53 = [1024, 6, 49, 49] + + # pd_op.reshape: (1024x6x49x49xf32) <- (64x16x6x49x49xf32, 4xi64) + reshape_132 = paddle._C_ops.reshape(add_23, full_int_array_53) + del full_int_array_53 + + # pd_op.softmax: (1024x6x49x49xf32) <- (1024x6x49x49xf32) + softmax_3 = paddle._C_ops.softmax(reshape_132, -1) + del reshape_132 + + # pd_op.matmul: (1024x6x49x32xf32) <- (1024x6x49x49xf32, 1024x6x49x32xf32) + matmul_67 = paddle._C_ops.matmul(softmax_3, slice_3, False, False) + + # pd_op.transpose: (1024x49x6x32xf32) <- (1024x6x49x32xf32) + transpose_23 = paddle._C_ops.transpose(matmul_67, [0, 2, 1, 3]) + del matmul_67 + + # pd_op.reshape: (1024x49x192xf32) <- (1024x49x6x32xf32, 3xi64) + reshape_33 = paddle._C_ops.reshape(transpose_23, full_int_array_47) + del full_int_array_47 + + # pd_op.matmul: (1024x49x192xf32) <- (1024x49x192xf32, 192x192xf32) + matmul_18 = paddle._C_ops.matmul(reshape_33, parameter_113, False, False) + del parameter_113 + + # pd_op.add: (1024x49x192xf32) <- (1024x49x192xf32, 192xf32) + add_24 = paddle._C_ops.add(matmul_18, parameter_112) + del parameter_112 + + # pd_op.reshape: (1024x7x7x192xf32) <- (1024x49x192xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_24, full_int_array_44) + del full_int_array_44 + + # pd_op.reshape: (64x4x4x7x7x192xf32) <- (1024x7x7x192xf32, 6xi64) + reshape_133 = paddle._C_ops.reshape(reshape_34, full_int_array_48) + del full_int_array_48 + + # pd_op.transpose: (64x4x7x4x7x192xf32) <- (64x4x4x7x7x192xf32) + transpose_24 = paddle._C_ops.transpose(reshape_133, [0, 1, 3, 2, 4, 5]) + del reshape_133 + + # pd_op.reshape: (64x28x28x192xf32) <- (64x4x7x4x7x192xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_24, full_int_array_49) + del full_int_array_49 + + # pd_op.roll: (64x28x28x192xf32) <- (64x28x28x192xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_35, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x784x192xf32) <- (64x28x28x192xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(roll_3, full_int_array_50) + del full_int_array_50 + + # pd_op.full: (xf32) <- () + full_4 = paddle._C_ops.full( + [], + float("0.945455"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_40 = full_4 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_4 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_92 = paddle._C_ops.add(full_4, uniform_4) + del uniform_4 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_4 = paddle._C_ops.floor(add_92) + del add_92 + + # pd_op.divide: (64x784x192xf32) <- (64x784x192xf32, xf32) + divide_4 = paddle._C_ops.divide(reshape_36, full_4) + + # pd_op.multiply: (64x784x192xf32) <- (64x784x192xf32, 64x1x1xf32) + multiply_4 = paddle._C_ops.multiply(divide_4, floor_4) + + # pd_op.add: (64x784x192xf32) <- (64x784x192xf32, 64x784x192xf32) + add_25 = paddle._C_ops.add(add_20, multiply_4) + + # pd_op.layer_norm: (64x784x192xf32, 64x784xf32, 64x784xf32) <- (64x784x192xf32, 192xf32, 192xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_111, parameter_110, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_110, parameter_111 + + # pd_op.matmul: (64x784x768xf32) <- (64x784x192xf32, 192x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_27, parameter_109, False, False) + del parameter_109 + + # pd_op.add: (64x784x768xf32) <- (64x784x768xf32, 768xf32) + add_26 = paddle._C_ops.add(matmul_19, parameter_108) + del parameter_108 + + # pd_op.gelu: (64x784x768xf32) <- (64x784x768xf32) + gelu_3 = paddle._C_ops.gelu(add_26, False) + + # pd_op.matmul: (64x784x192xf32) <- (64x784x768xf32, 768x192xf32) + matmul_20 = paddle._C_ops.matmul(gelu_3, parameter_107, False, False) + del parameter_107 + + # pd_op.add: (64x784x192xf32) <- (64x784x192xf32, 192xf32) + add_27 = paddle._C_ops.add(matmul_20, parameter_106) + del parameter_106 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_5 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_93 = paddle._C_ops.add(full_4, uniform_5) + del uniform_5 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_5 = paddle._C_ops.floor(add_93) + del add_93 + + # pd_op.divide: (64x784x192xf32) <- (64x784x192xf32, xf32) + divide_5 = paddle._C_ops.divide(add_27, full_4) + + # pd_op.multiply: (64x784x192xf32) <- (64x784x192xf32, 64x1x1xf32) + multiply_5 = paddle._C_ops.multiply(divide_5, floor_5) + + # pd_op.add: (64x784x192xf32) <- (64x784x192xf32, 64x784x192xf32) + add_28 = paddle._C_ops.add(add_25, multiply_5) + + # pd_op.reshape: (64x28x28x192xf32) <- (64x784x192xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_28, full_int_array_42) + + # pd_op.strided_slice: (64x14x14x192xf32) <- (64x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x14x14x192xf32) <- (64x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x14x14x192xf32) <- (64x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x14x14x192xf32) <- (64x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (64x28x28x192xf32) <- (64x28x28x192xf32, 4xi64) + reshape_134 = paddle._C_ops.reshape(reshape_37, full_int_array_42) + del full_int_array_42 + + # builtin.combine: ([64x14x14x192xf32, 64x14x14x192xf32, 64x14x14x192xf32, 64x14x14x192xf32]) <- (64x14x14x192xf32, 64x14x14x192xf32, 64x14x14x192xf32, 64x14x14x192xf32) + combine_1 = [strided_slice_4, strided_slice_5, strided_slice_6, strided_slice_7] + + # pd_op.concat: (64x14x14x768xf32) <- ([64x14x14x192xf32, 64x14x14x192xf32, 64x14x14x192xf32, 64x14x14x192xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_1, full_2) + del combine_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_54 = [64, -1, 768] + + # pd_op.reshape: (64x196x768xf32) <- (64x14x14x768xf32, 3xi64) + reshape_38 = paddle._C_ops.reshape(concat_1, full_int_array_54) + del full_int_array_54 + + # pd_op.layer_norm: (64x196x768xf32, 64x196xf32, 64x196xf32) <- (64x196x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_38, parameter_105, parameter_104, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_104, parameter_105 + + # pd_op.matmul: (64x196x384xf32) <- (64x196x768xf32, 768x384xf32) + matmul_21 = paddle._C_ops.matmul(layer_norm_30, parameter_103, False, False) + del parameter_103 + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_21, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_55 = [64, 14, 14, 384] + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(layer_norm_33, full_int_array_55) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_56 = [64, 2, 7, 2, 7, 384] + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_135 = paddle._C_ops.reshape(reshape_39, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_25 = paddle._C_ops.transpose(reshape_135, [0, 1, 3, 2, 4, 5]) + del reshape_135 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_57 = [-1, 7, 7, 384] + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(transpose_25, full_int_array_57) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_58 = [-1, 49, 384] + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(reshape_40, full_int_array_58) + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_22 = paddle._C_ops.matmul(reshape_41, parameter_100, False, False) + del parameter_100 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_29 = paddle._C_ops.add(matmul_22, parameter_99) + del parameter_99 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_59 = [256, 49, 3, 12, 32] + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_136 = paddle._C_ops.reshape(add_29, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_26 = paddle._C_ops.transpose(reshape_136, [2, 0, 3, 1, 4]) + del reshape_136 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_20, full_0, float("0"), True) + del slice_20 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_27 = paddle._C_ops.transpose(slice_21, [0, 1, 3, 2]) + del slice_21 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_23 = paddle._C_ops.matmul(scale_4, transpose_27, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_42 = paddle._C_ops.reshape(data_7, full_int_array_15) + del data_7 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_4 = paddle._C_ops.index_select(data_8, reshape_42, 0) + del data_8 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_137 = paddle._C_ops.reshape(index_select_4, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_28 = paddle._C_ops.transpose(reshape_137, [2, 0, 1]) + del reshape_137 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_28, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_94 = paddle._C_ops.add(matmul_23, unsqueeze_6) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_4 = paddle._C_ops.softmax(add_94, -1) + del add_94 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_68 = paddle._C_ops.matmul(softmax_4, slice_4, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_29 = paddle._C_ops.transpose(matmul_68, [0, 2, 1, 3]) + del matmul_68 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_60 = [256, 49, 384] + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_29, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_24 = paddle._C_ops.matmul(reshape_43, parameter_98, False, False) + del parameter_98 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_97) + del parameter_97 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_30, full_int_array_57) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_61 = [-1, 2, 2, 7, 7, 384] + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_138 = paddle._C_ops.reshape(reshape_44, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_30 = paddle._C_ops.transpose(reshape_138, [0, 1, 3, 2, 4, 5]) + del reshape_138 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_62 = [-1, 14, 14, 384] + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(transpose_30, full_int_array_62) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_63 = [64, 196, 384] + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_46 = paddle._C_ops.reshape(reshape_45, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_5 = paddle._C_ops.full( + [], + float("0.927273"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_62 = full_5 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_6 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_95 = paddle._C_ops.add(full_5, uniform_6) + del uniform_6 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_6 = paddle._C_ops.floor(add_95) + del add_95 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_6 = paddle._C_ops.divide(reshape_46, full_5) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_6 = paddle._C_ops.multiply(divide_6, floor_6) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_31 = paddle._C_ops.add(matmul_21, multiply_6) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_31, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_36, parameter_94, False, False) + del parameter_94 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_32 = paddle._C_ops.add(matmul_25, parameter_93) + del parameter_93 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_4 = paddle._C_ops.gelu(add_32, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_26 = paddle._C_ops.matmul(gelu_4, parameter_92, False, False) + del parameter_92 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_33 = paddle._C_ops.add(matmul_26, parameter_91) + del parameter_91 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_7 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_96 = paddle._C_ops.add(full_5, uniform_7) + del uniform_7 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_7 = paddle._C_ops.floor(add_96) + del add_96 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_7 = paddle._C_ops.divide(add_33, full_5) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_7 = paddle._C_ops.multiply(divide_7, floor_7) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_34 = paddle._C_ops.add(add_31, multiply_7) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(layer_norm_39, full_int_array_55) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_47, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_139 = paddle._C_ops.reshape(roll_4, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_31 = paddle._C_ops.transpose(reshape_139, [0, 1, 3, 2, 4, 5]) + del reshape_139 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(transpose_31, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_49 = paddle._C_ops.reshape(reshape_48, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_22 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + full_22, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_22 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__22 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__23 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__24 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__25 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__26 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__27 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__28 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__29 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_64 = [1, 2, 7, 2, 7, 1] + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_140 = paddle._C_ops.reshape(set_value__2, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_76 = paddle._C_ops.transpose(reshape_140, [0, 1, 3, 2, 4, 5]) + del reshape_140 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(transpose_76, full_int_array_36) + del transpose_76 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_142 = paddle._C_ops.reshape(reshape_141, full_int_array_37) + del reshape_141 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(reshape_142, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(reshape_142, full_int_array_2) + del reshape_142 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_25, unsqueeze_26) + del unsqueeze_25, unsqueeze_26 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_14) + + # pd_op.full: (4x49x49xf32) <- () + full_23 = paddle._C_ops.full( + [4, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_23, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_14) + + # pd_op.full: (4x49x49xf32) <- () + full_24 = paddle._C_ops.full( + [4, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_5 = paddle._C_ops.where(equal_2, full_24, where_4) + del equal_2, where_4 + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_27 = paddle._C_ops.matmul(reshape_49, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_35 = paddle._C_ops.add(matmul_27, parameter_87) + del parameter_87 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_143 = paddle._C_ops.reshape(add_35, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_32 = paddle._C_ops.transpose(reshape_143, [2, 0, 3, 1, 4]) + del reshape_143 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_22, full_0, float("0"), True) + del slice_22 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_33 = paddle._C_ops.transpose(slice_23, [0, 1, 3, 2]) + del slice_23 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_28 = paddle._C_ops.matmul(scale_5, transpose_33, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_50 = paddle._C_ops.reshape(data_9, full_int_array_15) + del data_9 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_5 = paddle._C_ops.index_select(data_10, reshape_50, 0) + del data_10 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_144 = paddle._C_ops.reshape(index_select_5, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_34 = paddle._C_ops.transpose(reshape_144, [2, 0, 1]) + del reshape_144 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(transpose_34, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_36 = paddle._C_ops.add(matmul_28, unsqueeze_7) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_65 = [64, 4, 12, 49, 49] + + # pd_op.reshape: (64x4x12x49x49xf32) <- (256x12x49x49xf32, 5xi64) + reshape_51 = paddle._C_ops.reshape(add_36, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(where_5, full_int_array_1) + del where_5 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(unsqueeze_27, full_int_array_0) + del unsqueeze_27 + + # pd_op.add: (64x4x12x49x49xf32) <- (64x4x12x49x49xf32, 1x4x1x49x49xf32) + add_37 = paddle._C_ops.add(reshape_51, unsqueeze_8) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_66 = [256, 12, 49, 49] + + # pd_op.reshape: (256x12x49x49xf32) <- (64x4x12x49x49xf32, 4xi64) + reshape_145 = paddle._C_ops.reshape(add_37, full_int_array_66) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_5 = paddle._C_ops.softmax(reshape_145, -1) + del reshape_145 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_69 = paddle._C_ops.matmul(softmax_5, slice_5, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_35 = paddle._C_ops.transpose(matmul_69, [0, 2, 1, 3]) + del matmul_69 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_52 = paddle._C_ops.reshape(transpose_35, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_29 = paddle._C_ops.matmul(reshape_52, parameter_86, False, False) + del parameter_86 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_38 = paddle._C_ops.add(matmul_29, parameter_85) + del parameter_85 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(add_38, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_146 = paddle._C_ops.reshape(reshape_53, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_36 = paddle._C_ops.transpose(reshape_146, [0, 1, 3, 2, 4, 5]) + del reshape_146 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(transpose_36, full_int_array_62) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_54, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(roll_5, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_6 = paddle._C_ops.full( + [], + float("0.909091"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_73 = full_6 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_8 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_97 = paddle._C_ops.add(full_6, uniform_8) + del uniform_8 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_8 = paddle._C_ops.floor(add_97) + del add_97 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_8 = paddle._C_ops.divide(reshape_55, full_6) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_8 = paddle._C_ops.multiply(divide_8, floor_8) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_39 = paddle._C_ops.add(add_34, multiply_8) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_39, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_42, parameter_82, False, False) + del parameter_82 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_40 = paddle._C_ops.add(matmul_30, parameter_81) + del parameter_81 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_5 = paddle._C_ops.gelu(add_40, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_31 = paddle._C_ops.matmul(gelu_5, parameter_80, False, False) + del parameter_80 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_41 = paddle._C_ops.add(matmul_31, parameter_79) + del parameter_79 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_9 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_98 = paddle._C_ops.add(full_6, uniform_9) + del uniform_9 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_9 = paddle._C_ops.floor(add_98) + del add_98 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_9 = paddle._C_ops.divide(add_41, full_6) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_9 = paddle._C_ops.multiply(divide_9, floor_9) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_42 = paddle._C_ops.add(add_39, multiply_9) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(layer_norm_45, full_int_array_55) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_147 = paddle._C_ops.reshape(reshape_56, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_37 = paddle._C_ops.transpose(reshape_147, [0, 1, 3, 2, 4, 5]) + del reshape_147 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(transpose_37, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_58 = paddle._C_ops.reshape(reshape_57, full_int_array_58) + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_32 = paddle._C_ops.matmul(reshape_58, parameter_76, False, False) + del parameter_76 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_43 = paddle._C_ops.add(matmul_32, parameter_75) + del parameter_75 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_148 = paddle._C_ops.reshape(add_43, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_38 = paddle._C_ops.transpose(reshape_148, [2, 0, 3, 1, 4]) + del reshape_148 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_24, full_0, float("0"), True) + del slice_24 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_39 = paddle._C_ops.transpose(slice_25, [0, 1, 3, 2]) + del slice_25 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_33 = paddle._C_ops.matmul(scale_6, transpose_39, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_59 = paddle._C_ops.reshape(data_11, full_int_array_15) + del data_11 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_6 = paddle._C_ops.index_select(data_12, reshape_59, 0) + del data_12 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_149 = paddle._C_ops.reshape(index_select_6, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_40 = paddle._C_ops.transpose(reshape_149, [2, 0, 1]) + del reshape_149 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_40, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_99 = paddle._C_ops.add(matmul_33, unsqueeze_9) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_6 = paddle._C_ops.softmax(add_99, -1) + del add_99 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_70 = paddle._C_ops.matmul(softmax_6, slice_6, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_41 = paddle._C_ops.transpose(matmul_70, [0, 2, 1, 3]) + del matmul_70 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_60 = paddle._C_ops.reshape(transpose_41, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_34 = paddle._C_ops.matmul(reshape_60, parameter_74, False, False) + del parameter_74 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_44 = paddle._C_ops.add(matmul_34, parameter_73) + del parameter_73 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(add_44, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_150 = paddle._C_ops.reshape(reshape_61, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_42 = paddle._C_ops.transpose(reshape_150, [0, 1, 3, 2, 4, 5]) + del reshape_150 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(transpose_42, full_int_array_62) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_7 = paddle._C_ops.full( + [], + float("0.890909"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_82 = full_7 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_10 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_100 = paddle._C_ops.add(full_7, uniform_10) + del uniform_10 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_10 = paddle._C_ops.floor(add_100) + del add_100 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_10 = paddle._C_ops.divide(reshape_63, full_7) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_10 = paddle._C_ops.multiply(divide_10, floor_10) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_45 = paddle._C_ops.add(add_42, multiply_10) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_48, parameter_70, False, False) + del parameter_70 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_46 = paddle._C_ops.add(matmul_35, parameter_69) + del parameter_69 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_6 = paddle._C_ops.gelu(add_46, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_36 = paddle._C_ops.matmul(gelu_6, parameter_68, False, False) + del parameter_68 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_47 = paddle._C_ops.add(matmul_36, parameter_67) + del parameter_67 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_11 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_101 = paddle._C_ops.add(full_7, uniform_11) + del uniform_11 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_11 = paddle._C_ops.floor(add_101) + del add_101 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_11 = paddle._C_ops.divide(add_47, full_7) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_11 = paddle._C_ops.multiply(divide_11, floor_11) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_48 = paddle._C_ops.add(add_45, multiply_11) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(layer_norm_51, full_int_array_55) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_64, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_151 = paddle._C_ops.reshape(roll_6, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_43 = paddle._C_ops.transpose(reshape_151, [0, 1, 3, 2, 4, 5]) + del reshape_151 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(transpose_43, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, full_int_array_58) + + # pd_op.full: (1x14x14x1xf32) <- () + full_25 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + full_25, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_25 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__30 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__32 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__35 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__37 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_152 = paddle._C_ops.reshape(set_value__3, full_int_array_64) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_77 = paddle._C_ops.transpose(reshape_152, [0, 1, 3, 2, 4, 5]) + del reshape_152 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_153 = paddle._C_ops.reshape(transpose_77, full_int_array_36) + del transpose_77 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_154 = paddle._C_ops.reshape(reshape_153, full_int_array_37) + del reshape_153 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(reshape_154, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(reshape_154, full_int_array_2) + del reshape_154 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_28, unsqueeze_29) + del unsqueeze_28, unsqueeze_29 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_14) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_23, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_14) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_7 = paddle._C_ops.where(equal_3, full_24, where_6) + del equal_3, where_6 + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_37 = paddle._C_ops.matmul(reshape_66, parameter_64, False, False) + del parameter_64 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_49 = paddle._C_ops.add(matmul_37, parameter_63) + del parameter_63 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_155 = paddle._C_ops.reshape(add_49, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_44 = paddle._C_ops.transpose(reshape_155, [2, 0, 3, 1, 4]) + del reshape_155 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_26, full_0, float("0"), True) + del slice_26 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_45 = paddle._C_ops.transpose(slice_27, [0, 1, 3, 2]) + del slice_27 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_38 = paddle._C_ops.matmul(scale_7, transpose_45, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_67 = paddle._C_ops.reshape(data_13, full_int_array_15) + del data_13 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_7 = paddle._C_ops.index_select(data_14, reshape_67, 0) + del data_14 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_156 = paddle._C_ops.reshape(index_select_7, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_46 = paddle._C_ops.transpose(reshape_156, [2, 0, 1]) + del reshape_156 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(transpose_46, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_50 = paddle._C_ops.add(matmul_38, unsqueeze_10) + + # pd_op.reshape: (64x4x12x49x49xf32) <- (256x12x49x49xf32, 5xi64) + reshape_68 = paddle._C_ops.reshape(add_50, full_int_array_65) + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(where_7, full_int_array_1) + del where_7 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_30, full_int_array_0) + del unsqueeze_30 + + # pd_op.add: (64x4x12x49x49xf32) <- (64x4x12x49x49xf32, 1x4x1x49x49xf32) + add_51 = paddle._C_ops.add(reshape_68, unsqueeze_11) + + # pd_op.reshape: (256x12x49x49xf32) <- (64x4x12x49x49xf32, 4xi64) + reshape_157 = paddle._C_ops.reshape(add_51, full_int_array_66) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_7 = paddle._C_ops.softmax(reshape_157, -1) + del reshape_157 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_71 = paddle._C_ops.matmul(softmax_7, slice_7, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_47 = paddle._C_ops.transpose(matmul_71, [0, 2, 1, 3]) + del matmul_71 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(transpose_47, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_39 = paddle._C_ops.matmul(reshape_69, parameter_62, False, False) + del parameter_62 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_52 = paddle._C_ops.add(matmul_39, parameter_61) + del parameter_61 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(add_52, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_158 = paddle._C_ops.reshape(reshape_70, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_48 = paddle._C_ops.transpose(reshape_158, [0, 1, 3, 2, 4, 5]) + del reshape_158 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(transpose_48, full_int_array_62) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_71, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(roll_7, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_8 = paddle._C_ops.full( + [], + float("0.872727"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_93 = full_8 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_12 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_102 = paddle._C_ops.add(full_8, uniform_12) + del uniform_12 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_12 = paddle._C_ops.floor(add_102) + del add_102 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_12 = paddle._C_ops.divide(reshape_72, full_8) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_12 = paddle._C_ops.multiply(divide_12, floor_12) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_53 = paddle._C_ops.add(add_48, multiply_12) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_54, parameter_58, False, False) + del parameter_58 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_54 = paddle._C_ops.add(matmul_40, parameter_57) + del parameter_57 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_7 = paddle._C_ops.gelu(add_54, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_41 = paddle._C_ops.matmul(gelu_7, parameter_56, False, False) + del parameter_56 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_55 = paddle._C_ops.add(matmul_41, parameter_55) + del parameter_55 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_13 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_103 = paddle._C_ops.add(full_8, uniform_13) + del uniform_13 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_13 = paddle._C_ops.floor(add_103) + del add_103 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_13 = paddle._C_ops.divide(add_55, full_8) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_13 = paddle._C_ops.multiply(divide_13, floor_13) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_56 = paddle._C_ops.add(add_53, multiply_13) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(layer_norm_57, full_int_array_55) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_159 = paddle._C_ops.reshape(reshape_73, full_int_array_56) + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_49 = paddle._C_ops.transpose(reshape_159, [0, 1, 3, 2, 4, 5]) + del reshape_159 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(transpose_49, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_75 = paddle._C_ops.reshape(reshape_74, full_int_array_58) + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_42 = paddle._C_ops.matmul(reshape_75, parameter_52, False, False) + del parameter_52 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_57 = paddle._C_ops.add(matmul_42, parameter_51) + del parameter_51 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_160 = paddle._C_ops.reshape(add_57, full_int_array_59) + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_50 = paddle._C_ops.transpose(reshape_160, [2, 0, 3, 1, 4]) + del reshape_160 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_28, full_0, float("0"), True) + del slice_28 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_51 = paddle._C_ops.transpose(slice_29, [0, 1, 3, 2]) + del slice_29 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_43 = paddle._C_ops.matmul(scale_8, transpose_51, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_76 = paddle._C_ops.reshape(data_15, full_int_array_15) + del data_15 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_8 = paddle._C_ops.index_select(data_16, reshape_76, 0) + del data_16 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_161 = paddle._C_ops.reshape(index_select_8, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_52 = paddle._C_ops.transpose(reshape_161, [2, 0, 1]) + del reshape_161 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_52, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_104 = paddle._C_ops.add(matmul_43, unsqueeze_12) + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_8 = paddle._C_ops.softmax(add_104, -1) + del add_104 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_72 = paddle._C_ops.matmul(softmax_8, slice_8, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_53 = paddle._C_ops.transpose(matmul_72, [0, 2, 1, 3]) + del matmul_72 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_77 = paddle._C_ops.reshape(transpose_53, full_int_array_60) + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_44 = paddle._C_ops.matmul(reshape_77, parameter_50, False, False) + del parameter_50 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_58 = paddle._C_ops.add(matmul_44, parameter_49) + del parameter_49 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(add_58, full_int_array_57) + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_162 = paddle._C_ops.reshape(reshape_78, full_int_array_61) + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_54 = paddle._C_ops.transpose(reshape_162, [0, 1, 3, 2, 4, 5]) + del reshape_162 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_54, full_int_array_62) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, full_int_array_63) + + # pd_op.full: (xf32) <- () + full_9 = paddle._C_ops.full( + [], + float("0.854545"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_102 = full_9 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_14 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_105 = paddle._C_ops.add(full_9, uniform_14) + del uniform_14 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_14 = paddle._C_ops.floor(add_105) + del add_105 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_14 = paddle._C_ops.divide(reshape_80, full_9) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_14 = paddle._C_ops.multiply(divide_14, floor_14) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_59 = paddle._C_ops.add(add_56, multiply_14) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_59, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_45 = paddle._C_ops.matmul(layer_norm_60, parameter_46, False, False) + del parameter_46 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_60 = paddle._C_ops.add(matmul_45, parameter_45) + del parameter_45 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_8 = paddle._C_ops.gelu(add_60, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_46 = paddle._C_ops.matmul(gelu_8, parameter_44, False, False) + del parameter_44 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_61 = paddle._C_ops.add(matmul_46, parameter_43) + del parameter_43 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_15 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_106 = paddle._C_ops.add(full_9, uniform_15) + del uniform_15 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_15 = paddle._C_ops.floor(add_106) + del add_106 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_15 = paddle._C_ops.divide(add_61, full_9) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_15 = paddle._C_ops.multiply(divide_15, floor_15) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_62 = paddle._C_ops.add(add_59, multiply_15) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(layer_norm_63, full_int_array_55) + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_81, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x2x7x2x7x384xf32) <- (64x14x14x384xf32, 6xi64) + reshape_163 = paddle._C_ops.reshape(roll_8, full_int_array_56) + del full_int_array_56 + + # pd_op.transpose: (64x2x2x7x7x384xf32) <- (64x2x7x2x7x384xf32) + transpose_55 = paddle._C_ops.transpose(reshape_163, [0, 1, 3, 2, 4, 5]) + del reshape_163 + + # pd_op.reshape: (256x7x7x384xf32) <- (64x2x2x7x7x384xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_55, full_int_array_57) + + # pd_op.reshape: (256x49x384xf32) <- (256x7x7x384xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_58) + del full_int_array_58 + + # pd_op.full: (1x14x14x1xf32) <- () + full_26 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + full_26, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_26 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__43 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__45 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_164 = paddle._C_ops.reshape(set_value__4, full_int_array_64) + del full_int_array_64 + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_78 = paddle._C_ops.transpose(reshape_164, [0, 1, 3, 2, 4, 5]) + del reshape_164 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_165 = paddle._C_ops.reshape(transpose_78, full_int_array_36) + del transpose_78 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_166 = paddle._C_ops.reshape(reshape_165, full_int_array_37) + del reshape_165 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(reshape_166, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(reshape_166, full_int_array_2) + del reshape_166 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_31, unsqueeze_32) + del unsqueeze_31, unsqueeze_32 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_14) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_23, subtract_4) + del full_23, not_equal_4, subtract_4 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_14) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_9 = paddle._C_ops.where(equal_4, full_24, where_8) + del equal_4, full_24, where_8 + + # pd_op.matmul: (256x49x1152xf32) <- (256x49x384xf32, 384x1152xf32) + matmul_47 = paddle._C_ops.matmul(reshape_83, parameter_40, False, False) + del parameter_40 + + # pd_op.add: (256x49x1152xf32) <- (256x49x1152xf32, 1152xf32) + add_63 = paddle._C_ops.add(matmul_47, parameter_39) + del parameter_39 + + # pd_op.reshape: (256x49x3x12x32xf32) <- (256x49x1152xf32, 5xi64) + reshape_167 = paddle._C_ops.reshape(add_63, full_int_array_59) + del full_int_array_59 + + # pd_op.transpose: (3x256x12x49x32xf32) <- (256x49x3x12x32xf32) + transpose_56 = paddle._C_ops.transpose(reshape_167, [2, 0, 3, 1, 4]) + del reshape_167 + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (256x12x49x32xf32) <- (3x256x12x49x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (256x12x49x32xf32) <- (256x12x49x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_30, full_0, float("0"), True) + del slice_30 + + # pd_op.transpose: (256x12x32x49xf32) <- (256x12x49x32xf32) + transpose_57 = paddle._C_ops.transpose(slice_31, [0, 1, 3, 2]) + del slice_31 + + # pd_op.matmul: (256x12x49x49xf32) <- (256x12x49x32xf32, 256x12x32x49xf32) + matmul_48 = paddle._C_ops.matmul(scale_9, transpose_57, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_84 = paddle._C_ops.reshape(data_17, full_int_array_15) + del data_17 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_9 = paddle._C_ops.index_select(data_18, reshape_84, 0) + del data_18 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_168 = paddle._C_ops.reshape(index_select_9, full_int_array_16) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_58 = paddle._C_ops.transpose(reshape_168, [2, 0, 1]) + del reshape_168 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(transpose_58, full_int_array_0) + + # pd_op.add: (256x12x49x49xf32) <- (256x12x49x49xf32, 1x12x49x49xf32) + add_64 = paddle._C_ops.add(matmul_48, unsqueeze_13) + + # pd_op.reshape: (64x4x12x49x49xf32) <- (256x12x49x49xf32, 5xi64) + reshape_85 = paddle._C_ops.reshape(add_64, full_int_array_65) + del full_int_array_65 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(where_9, full_int_array_1) + del where_9 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(unsqueeze_33, full_int_array_0) + del unsqueeze_33 + + # pd_op.add: (64x4x12x49x49xf32) <- (64x4x12x49x49xf32, 1x4x1x49x49xf32) + add_65 = paddle._C_ops.add(reshape_85, unsqueeze_14) + + # pd_op.reshape: (256x12x49x49xf32) <- (64x4x12x49x49xf32, 4xi64) + reshape_169 = paddle._C_ops.reshape(add_65, full_int_array_66) + del full_int_array_66 + + # pd_op.softmax: (256x12x49x49xf32) <- (256x12x49x49xf32) + softmax_9 = paddle._C_ops.softmax(reshape_169, -1) + del reshape_169 + + # pd_op.matmul: (256x12x49x32xf32) <- (256x12x49x49xf32, 256x12x49x32xf32) + matmul_73 = paddle._C_ops.matmul(softmax_9, slice_9, False, False) + + # pd_op.transpose: (256x49x12x32xf32) <- (256x12x49x32xf32) + transpose_59 = paddle._C_ops.transpose(matmul_73, [0, 2, 1, 3]) + del matmul_73 + + # pd_op.reshape: (256x49x384xf32) <- (256x49x12x32xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(transpose_59, full_int_array_60) + del full_int_array_60 + + # pd_op.matmul: (256x49x384xf32) <- (256x49x384xf32, 384x384xf32) + matmul_49 = paddle._C_ops.matmul(reshape_86, parameter_38, False, False) + del parameter_38 + + # pd_op.add: (256x49x384xf32) <- (256x49x384xf32, 384xf32) + add_66 = paddle._C_ops.add(matmul_49, parameter_37) + del parameter_37 + + # pd_op.reshape: (256x7x7x384xf32) <- (256x49x384xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(add_66, full_int_array_57) + del full_int_array_57 + + # pd_op.reshape: (64x2x2x7x7x384xf32) <- (256x7x7x384xf32, 6xi64) + reshape_170 = paddle._C_ops.reshape(reshape_87, full_int_array_61) + del full_int_array_61 + + # pd_op.transpose: (64x2x7x2x7x384xf32) <- (64x2x2x7x7x384xf32) + transpose_60 = paddle._C_ops.transpose(reshape_170, [0, 1, 3, 2, 4, 5]) + del reshape_170 + + # pd_op.reshape: (64x14x14x384xf32) <- (64x2x7x2x7x384xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(transpose_60, full_int_array_62) + del full_int_array_62 + + # pd_op.roll: (64x14x14x384xf32) <- (64x14x14x384xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_88, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x196x384xf32) <- (64x14x14x384xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(roll_9, full_int_array_63) + del full_int_array_63 + + # pd_op.full: (xf32) <- () + full_10 = paddle._C_ops.full( + [], + float("0.836364"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_113 = full_10 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_16 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_107 = paddle._C_ops.add(full_10, uniform_16) + del uniform_16 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_16 = paddle._C_ops.floor(add_107) + del add_107 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_16 = paddle._C_ops.divide(reshape_89, full_10) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_16 = paddle._C_ops.multiply(divide_16, floor_16) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_67 = paddle._C_ops.add(add_62, multiply_16) + + # pd_op.layer_norm: (64x196x384xf32, 64x196xf32, 64x196xf32) <- (64x196x384xf32, 384xf32, 384xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_67, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (64x196x1536xf32) <- (64x196x384xf32, 384x1536xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_66, parameter_34, False, False) + del parameter_34 + + # pd_op.add: (64x196x1536xf32) <- (64x196x1536xf32, 1536xf32) + add_68 = paddle._C_ops.add(matmul_50, parameter_33) + del parameter_33 + + # pd_op.gelu: (64x196x1536xf32) <- (64x196x1536xf32) + gelu_9 = paddle._C_ops.gelu(add_68, False) + + # pd_op.matmul: (64x196x384xf32) <- (64x196x1536xf32, 1536x384xf32) + matmul_51 = paddle._C_ops.matmul(gelu_9, parameter_32, False, False) + del parameter_32 + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 384xf32) + add_69 = paddle._C_ops.add(matmul_51, parameter_31) + del parameter_31 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_17 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_108 = paddle._C_ops.add(full_10, uniform_17) + del uniform_17 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_17 = paddle._C_ops.floor(add_108) + del add_108 + + # pd_op.divide: (64x196x384xf32) <- (64x196x384xf32, xf32) + divide_17 = paddle._C_ops.divide(add_69, full_10) + + # pd_op.multiply: (64x196x384xf32) <- (64x196x384xf32, 64x1x1xf32) + multiply_17 = paddle._C_ops.multiply(divide_17, floor_17) + + # pd_op.add: (64x196x384xf32) <- (64x196x384xf32, 64x196x384xf32) + add_70 = paddle._C_ops.add(add_67, multiply_17) + + # pd_op.reshape: (64x14x14x384xf32) <- (64x196x384xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(add_70, full_int_array_55) + + # pd_op.strided_slice: (64x7x7x384xf32) <- (64x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_90, [1, 2], full_int_array_21, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x7x7x384xf32) <- (64x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_90, [1, 2], full_int_array_7, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x7x7x384xf32) <- (64x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_90, [1, 2], full_int_array_8, full_int_array_34, full_int_array_6 + ) + + # pd_op.strided_slice: (64x7x7x384xf32) <- (64x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_90, [1, 2], full_int_array_23, full_int_array_34, full_int_array_6 + ) + + # pd_op.reshape: (64x14x14x384xf32) <- (64x14x14x384xf32, 4xi64) + reshape_171 = paddle._C_ops.reshape(reshape_90, full_int_array_55) + del full_int_array_55 + + # builtin.combine: ([64x7x7x384xf32, 64x7x7x384xf32, 64x7x7x384xf32, 64x7x7x384xf32]) <- (64x7x7x384xf32, 64x7x7x384xf32, 64x7x7x384xf32, 64x7x7x384xf32) + combine_2 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + + # pd_op.concat: (64x7x7x1536xf32) <- ([64x7x7x384xf32, 64x7x7x384xf32, 64x7x7x384xf32, 64x7x7x384xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_2, full_2) + del combine_2 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_67 = [64, -1, 1536] + + # pd_op.reshape: (64x49x1536xf32) <- (64x7x7x1536xf32, 3xi64) + reshape_91 = paddle._C_ops.reshape(concat_2, full_int_array_67) + del full_int_array_67 + + # pd_op.layer_norm: (64x49x1536xf32, 64x49xf32, 64x49xf32) <- (64x49x1536xf32, 1536xf32, 1536xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_91, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30 + + # pd_op.matmul: (64x49x768xf32) <- (64x49x1536xf32, 1536x768xf32) + matmul_52 = paddle._C_ops.matmul(layer_norm_69, parameter_28, False, False) + del parameter_28 + + # pd_op.layer_norm: (64x49x768xf32, 64x49xf32, 64x49xf32) <- (64x49x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_52, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_68 = [64, 7, 7, 768] + + # pd_op.reshape: (64x7x7x768xf32) <- (64x49x768xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(layer_norm_72, full_int_array_68) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_69 = [64, 1, 7, 1, 7, 768] + + # pd_op.reshape: (64x1x7x1x7x768xf32) <- (64x7x7x768xf32, 6xi64) + reshape_172 = paddle._C_ops.reshape(reshape_92, full_int_array_69) + + # pd_op.transpose: (64x1x1x7x7x768xf32) <- (64x1x7x1x7x768xf32) + transpose_61 = paddle._C_ops.transpose(reshape_172, [0, 1, 3, 2, 4, 5]) + del reshape_172 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_70 = [-1, 7, 7, 768] + + # pd_op.reshape: (64x7x7x768xf32) <- (64x1x1x7x7x768xf32, 4xi64) + reshape_93 = paddle._C_ops.reshape(transpose_61, full_int_array_70) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_71 = [-1, 49, 768] + + # pd_op.reshape: (64x49x768xf32) <- (64x7x7x768xf32, 3xi64) + reshape_94 = paddle._C_ops.reshape(reshape_93, full_int_array_71) + + # pd_op.matmul: (64x49x2304xf32) <- (64x49x768xf32, 768x2304xf32) + matmul_53 = paddle._C_ops.matmul(reshape_94, parameter_25, False, False) + del parameter_25 + + # pd_op.add: (64x49x2304xf32) <- (64x49x2304xf32, 2304xf32) + add_71 = paddle._C_ops.add(matmul_53, parameter_24) + del parameter_24 + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_72 = [64, 49, 3, 24, 32] + + # pd_op.reshape: (64x49x3x24x32xf32) <- (64x49x2304xf32, 5xi64) + reshape_173 = paddle._C_ops.reshape(add_71, full_int_array_72) + + # pd_op.transpose: (3x64x24x49x32xf32) <- (64x49x3x24x32xf32) + transpose_62 = paddle._C_ops.transpose(reshape_173, [2, 0, 3, 1, 4]) + del reshape_173 + + # pd_op.slice: (64x24x49x32xf32) <- (3x64x24x49x32xf32, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x24x49x32xf32) <- (3x64x24x49x32xf32, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x24x49x32xf32) <- (3x64x24x49x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x24x49x32xf32) <- (64x24x49x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_32, full_0, float("0"), True) + del slice_32 + + # pd_op.transpose: (64x24x32x49xf32) <- (64x24x49x32xf32) + transpose_63 = paddle._C_ops.transpose(slice_33, [0, 1, 3, 2]) + del slice_33 + + # pd_op.matmul: (64x24x49x49xf32) <- (64x24x49x32xf32, 64x24x32x49xf32) + matmul_54 = paddle._C_ops.matmul(scale_10, transpose_63, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_95 = paddle._C_ops.reshape(data_19, full_int_array_15) + del data_19 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_10 = paddle._C_ops.index_select(data_20, reshape_95, 0) + del data_20 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_174 = paddle._C_ops.reshape(index_select_10, full_int_array_16) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_64 = paddle._C_ops.transpose(reshape_174, [2, 0, 1]) + del reshape_174 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_64, full_int_array_0) + + # pd_op.add: (64x24x49x49xf32) <- (64x24x49x49xf32, 1x24x49x49xf32) + add_109 = paddle._C_ops.add(matmul_54, unsqueeze_15) + + # pd_op.softmax: (64x24x49x49xf32) <- (64x24x49x49xf32) + softmax_10 = paddle._C_ops.softmax(add_109, -1) + del add_109 + + # pd_op.matmul: (64x24x49x32xf32) <- (64x24x49x49xf32, 64x24x49x32xf32) + matmul_74 = paddle._C_ops.matmul(softmax_10, slice_10, False, False) + + # pd_op.transpose: (64x49x24x32xf32) <- (64x24x49x32xf32) + transpose_65 = paddle._C_ops.transpose(matmul_74, [0, 2, 1, 3]) + del matmul_74 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_73 = [64, 49, 768] + + # pd_op.reshape: (64x49x768xf32) <- (64x49x24x32xf32, 3xi64) + reshape_96 = paddle._C_ops.reshape(transpose_65, full_int_array_73) + + # pd_op.matmul: (64x49x768xf32) <- (64x49x768xf32, 768x768xf32) + matmul_55 = paddle._C_ops.matmul(reshape_96, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 768xf32) + add_72 = paddle._C_ops.add(matmul_55, parameter_22) + del parameter_22 + + # pd_op.reshape: (64x7x7x768xf32) <- (64x49x768xf32, 4xi64) + reshape_97 = paddle._C_ops.reshape(add_72, full_int_array_70) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_74 = [-1, 1, 1, 7, 7, 768] + + # pd_op.reshape: (64x1x1x7x7x768xf32) <- (64x7x7x768xf32, 6xi64) + reshape_175 = paddle._C_ops.reshape(reshape_97, full_int_array_74) + + # pd_op.transpose: (64x1x7x1x7x768xf32) <- (64x1x1x7x7x768xf32) + transpose_66 = paddle._C_ops.transpose(reshape_175, [0, 1, 3, 2, 4, 5]) + del reshape_175 + + # pd_op.reshape: (64x7x7x768xf32) <- (64x1x7x1x7x768xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(transpose_66, full_int_array_70) + + # pd_op.reshape: (64x49x768xf32) <- (64x7x7x768xf32, 3xi64) + reshape_99 = paddle._C_ops.reshape(reshape_98, full_int_array_73) + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], + float("0.818182"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_135 = full_11 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_18 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_110 = paddle._C_ops.add(full_11, uniform_18) + del uniform_18 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_18 = paddle._C_ops.floor(add_110) + del add_110 + + # pd_op.divide: (64x49x768xf32) <- (64x49x768xf32, xf32) + divide_18 = paddle._C_ops.divide(reshape_99, full_11) + + # pd_op.multiply: (64x49x768xf32) <- (64x49x768xf32, 64x1x1xf32) + multiply_18 = paddle._C_ops.multiply(divide_18, floor_18) + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 64x49x768xf32) + add_73 = paddle._C_ops.add(matmul_52, multiply_18) + + # pd_op.layer_norm: (64x49x768xf32, 64x49xf32, 64x49xf32) <- (64x49x768xf32, 768xf32, 768xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (64x49x3072xf32) <- (64x49x768xf32, 768x3072xf32) + matmul_56 = paddle._C_ops.matmul(layer_norm_75, parameter_19, False, False) + del parameter_19 + + # pd_op.add: (64x49x3072xf32) <- (64x49x3072xf32, 3072xf32) + add_74 = paddle._C_ops.add(matmul_56, parameter_18) + del parameter_18 + + # pd_op.gelu: (64x49x3072xf32) <- (64x49x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_74, False) + + # pd_op.matmul: (64x49x768xf32) <- (64x49x3072xf32, 3072x768xf32) + matmul_57 = paddle._C_ops.matmul(gelu_10, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_57, parameter_16) + del parameter_16 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_19 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_111 = paddle._C_ops.add(full_11, uniform_19) + del uniform_19 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_19 = paddle._C_ops.floor(add_111) + del add_111 + + # pd_op.divide: (64x49x768xf32) <- (64x49x768xf32, xf32) + divide_19 = paddle._C_ops.divide(add_75, full_11) + + # pd_op.multiply: (64x49x768xf32) <- (64x49x768xf32, 64x1x1xf32) + multiply_19 = paddle._C_ops.multiply(divide_19, floor_19) + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 64x49x768xf32) + add_76 = paddle._C_ops.add(add_73, multiply_19) + + # pd_op.layer_norm: (64x49x768xf32, 64x49xf32, 64x49xf32) <- (64x49x768xf32, 768xf32, 768xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # pd_op.reshape: (64x7x7x768xf32) <- (64x49x768xf32, 4xi64) + reshape_100 = paddle._C_ops.reshape(layer_norm_78, full_int_array_68) + del full_int_array_68 + + # pd_op.roll: (64x7x7x768xf32) <- (64x7x7x768xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_100, full_int_array_4, [1, 2]) + + # pd_op.reshape: (64x1x7x1x7x768xf32) <- (64x7x7x768xf32, 6xi64) + reshape_176 = paddle._C_ops.reshape(roll_10, full_int_array_69) + del full_int_array_69 + + # pd_op.transpose: (64x1x1x7x7x768xf32) <- (64x1x7x1x7x768xf32) + transpose_67 = paddle._C_ops.transpose(reshape_176, [0, 1, 3, 2, 4, 5]) + del reshape_176 + + # pd_op.reshape: (64x7x7x768xf32) <- (64x1x1x7x7x768xf32, 4xi64) + reshape_101 = paddle._C_ops.reshape(transpose_67, full_int_array_70) + + # pd_op.reshape: (64x49x768xf32) <- (64x7x7x768xf32, 3xi64) + reshape_102 = paddle._C_ops.reshape(reshape_101, full_int_array_71) + del full_int_array_71 + + # pd_op.full: (1x7x7x1xf32) <- () + full_27 = paddle._C_ops.full( + [1, 7, 7, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + full_27, + full_int_array_21, + full_int_array_22, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_27, full_int_array_21 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_24, + full_int_array_25, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_24, set_value__46 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_26, + full_int_array_27, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_26, full_int_array_27, set_value__47 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_28, + full_int_array_29, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_28, set_value__48 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_22, + full_int_array_4, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_22, set_value__49 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_25, + full_int_array_30, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_25, full_int_array_30, set_value__50 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_31, + full_int_array_32, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_31, full_int_array_32, set_value__51 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_29, + full_int_array_33, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_29, full_int_array_33, set_value__52 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_4, + full_int_array_34, + full_int_array_23, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_34, set_value__53 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_75 = [1, 1, 7, 1, 7, 1] + + # pd_op.reshape: (1x1x7x1x7x1xf32) <- (1x7x7x1xf32, 6xi64) + reshape_177 = paddle._C_ops.reshape(set_value__5, full_int_array_75) + del full_int_array_75 + + # pd_op.transpose: (1x1x1x7x7x1xf32) <- (1x1x7x1x7x1xf32) + transpose_79 = paddle._C_ops.transpose(reshape_177, [0, 1, 3, 2, 4, 5]) + del reshape_177 + + # pd_op.reshape: (1x7x7x1xf32) <- (1x1x1x7x7x1xf32, 4xi64) + reshape_178 = paddle._C_ops.reshape(transpose_79, full_int_array_36) + del full_int_array_36, transpose_79 + + # pd_op.reshape: (1x49xf32) <- (1x7x7x1xf32, 2xi64) + reshape_179 = paddle._C_ops.reshape(reshape_178, full_int_array_37) + del full_int_array_37, reshape_178 + + # pd_op.unsqueeze: (1x1x49xf32) <- (1x49xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(reshape_179, full_int_array_1) + + # pd_op.unsqueeze: (1x49x1xf32) <- (1x49xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(reshape_179, full_int_array_2) + del reshape_179 + + # pd_op.subtract: (1x49x49xf32) <- (1x1x49xf32, 1x49x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_34, unsqueeze_35) + del unsqueeze_34, unsqueeze_35 + + # pd_op.not_equal: (1x49x49xb) <- (1x49x49xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_14) + + # pd_op.full: (1x49x49xf32) <- () + full_28 = paddle._C_ops.full( + [1, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_28, subtract_5) + del full_28, not_equal_5, subtract_5 + + # pd_op.equal: (1x49x49xb) <- (1x49x49xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_14) + del full_14 + + # pd_op.full: (1x49x49xf32) <- () + full_29 = paddle._C_ops.full( + [1, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_11 = paddle._C_ops.where(equal_5, full_29, where_10) + del equal_5, full_29, where_10 + + # pd_op.matmul: (64x49x2304xf32) <- (64x49x768xf32, 768x2304xf32) + matmul_58 = paddle._C_ops.matmul(reshape_102, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (64x49x2304xf32) <- (64x49x2304xf32, 2304xf32) + add_77 = paddle._C_ops.add(matmul_58, parameter_12) + del parameter_12 + + # pd_op.reshape: (64x49x3x24x32xf32) <- (64x49x2304xf32, 5xi64) + reshape_180 = paddle._C_ops.reshape(add_77, full_int_array_72) + del full_int_array_72 + + # pd_op.transpose: (3x64x24x49x32xf32) <- (64x49x3x24x32xf32) + transpose_68 = paddle._C_ops.transpose(reshape_180, [2, 0, 3, 1, 4]) + del reshape_180 + + # pd_op.slice: (64x24x49x32xf32) <- (3x64x24x49x32xf32, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (64x24x49x32xf32) <- (3x64x24x49x32xf32, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_1, full_int_array_2, [1], [0] + ) + + # pd_op.slice: (64x24x49x32xf32) <- (3x64x24x49x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_2, full_int_array_3, [1], [0] + ) + + # pd_op.scale: (64x24x49x32xf32) <- (64x24x49x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_34, full_0, float("0"), True) + del slice_34 + + # pd_op.transpose: (64x24x32x49xf32) <- (64x24x49x32xf32) + transpose_69 = paddle._C_ops.transpose(slice_35, [0, 1, 3, 2]) + del slice_35 + + # pd_op.matmul: (64x24x49x49xf32) <- (64x24x49x32xf32, 64x24x32x49xf32) + matmul_59 = paddle._C_ops.matmul(scale_11, transpose_69, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_103 = paddle._C_ops.reshape(data_21, full_int_array_15) + del data_21, full_int_array_15 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_11 = paddle._C_ops.index_select(data_22, reshape_103, 0) + del data_22 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_181 = paddle._C_ops.reshape(index_select_11, full_int_array_16) + del full_int_array_16 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_70 = paddle._C_ops.transpose(reshape_181, [2, 0, 1]) + del reshape_181 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(transpose_70, full_int_array_0) + + # pd_op.add: (64x24x49x49xf32) <- (64x24x49x49xf32, 1x24x49x49xf32) + add_78 = paddle._C_ops.add(matmul_59, unsqueeze_16) + + # pd_op.full_int_array: (5xi64) <- () + full_int_array_76 = [64, 1, 24, 49, 49] + + # pd_op.reshape: (64x1x24x49x49xf32) <- (64x24x49x49xf32, 5xi64) + reshape_104 = paddle._C_ops.reshape(add_78, full_int_array_76) + del full_int_array_76 + + # pd_op.unsqueeze: (1x1x49x49xf32) <- (1x49x49xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(where_11, full_int_array_1) + del where_11 + + # pd_op.unsqueeze: (1x1x1x49x49xf32) <- (1x1x49x49xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_36, full_int_array_0) + del unsqueeze_36 + + # pd_op.add: (64x1x24x49x49xf32) <- (64x1x24x49x49xf32, 1x1x1x49x49xf32) + add_79 = paddle._C_ops.add(reshape_104, unsqueeze_17) + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_77 = [64, 24, 49, 49] + + # pd_op.reshape: (64x24x49x49xf32) <- (64x1x24x49x49xf32, 4xi64) + reshape_182 = paddle._C_ops.reshape(add_79, full_int_array_77) + del full_int_array_77 + + # pd_op.softmax: (64x24x49x49xf32) <- (64x24x49x49xf32) + softmax_11 = paddle._C_ops.softmax(reshape_182, -1) + del reshape_182 + + # pd_op.matmul: (64x24x49x32xf32) <- (64x24x49x49xf32, 64x24x49x32xf32) + matmul_75 = paddle._C_ops.matmul(softmax_11, slice_11, False, False) + + # pd_op.transpose: (64x49x24x32xf32) <- (64x24x49x32xf32) + transpose_71 = paddle._C_ops.transpose(matmul_75, [0, 2, 1, 3]) + del matmul_75 + + # pd_op.reshape: (64x49x768xf32) <- (64x49x24x32xf32, 3xi64) + reshape_105 = paddle._C_ops.reshape(transpose_71, full_int_array_73) + + # pd_op.matmul: (64x49x768xf32) <- (64x49x768xf32, 768x768xf32) + matmul_60 = paddle._C_ops.matmul(reshape_105, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 768xf32) + add_80 = paddle._C_ops.add(matmul_60, parameter_10) + del parameter_10 + + # pd_op.reshape: (64x7x7x768xf32) <- (64x49x768xf32, 4xi64) + reshape_106 = paddle._C_ops.reshape(add_80, full_int_array_70) + + # pd_op.reshape: (64x1x1x7x7x768xf32) <- (64x7x7x768xf32, 6xi64) + reshape_183 = paddle._C_ops.reshape(reshape_106, full_int_array_74) + del full_int_array_74 + + # pd_op.transpose: (64x1x7x1x7x768xf32) <- (64x1x1x7x7x768xf32) + transpose_72 = paddle._C_ops.transpose(reshape_183, [0, 1, 3, 2, 4, 5]) + del reshape_183 + + # pd_op.reshape: (64x7x7x768xf32) <- (64x1x7x1x7x768xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(transpose_72, full_int_array_70) + del full_int_array_70 + + # pd_op.roll: (64x7x7x768xf32) <- (64x7x7x768xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_107, full_int_array_5, [1, 2]) + + # pd_op.reshape: (64x49x768xf32) <- (64x7x7x768xf32, 3xi64) + reshape_108 = paddle._C_ops.reshape(roll_11, full_int_array_73) + del full_int_array_73 + + # pd_op.full: (xf32) <- () + full_12 = paddle._C_ops.full( + [], float("0.8"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_146 = full_12 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_20 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_112 = paddle._C_ops.add(full_12, uniform_20) + del uniform_20 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_20 = paddle._C_ops.floor(add_112) + del add_112 + + # pd_op.divide: (64x49x768xf32) <- (64x49x768xf32, xf32) + divide_20 = paddle._C_ops.divide(reshape_108, full_12) + + # pd_op.multiply: (64x49x768xf32) <- (64x49x768xf32, 64x1x1xf32) + multiply_20 = paddle._C_ops.multiply(divide_20, floor_20) + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 64x49x768xf32) + add_81 = paddle._C_ops.add(add_76, multiply_20) + + # pd_op.layer_norm: (64x49x768xf32, 64x49xf32, 64x49xf32) <- (64x49x768xf32, 768xf32, 768xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (64x49x3072xf32) <- (64x49x768xf32, 768x3072xf32) + matmul_61 = paddle._C_ops.matmul(layer_norm_81, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (64x49x3072xf32) <- (64x49x3072xf32, 3072xf32) + add_82 = paddle._C_ops.add(matmul_61, parameter_6) + del parameter_6 + + # pd_op.gelu: (64x49x3072xf32) <- (64x49x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_82, False) + + # pd_op.matmul: (64x49x768xf32) <- (64x49x3072xf32, 3072x768xf32) + matmul_62 = paddle._C_ops.matmul(gelu_11, parameter_5, False, False) + del parameter_5 + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 768xf32) + add_83 = paddle._C_ops.add(matmul_62, parameter_4) + del parameter_4 + + # pd_op.uniform: (64x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_21 = paddle._C_ops.uniform( + full_int_array_40, + paddle.float32, + full_17, + full_18, + 0, + paddle.framework._current_expected_place(), + ) + del full_17, full_18, full_int_array_40 + + # pd_op.add: (64x1x1xf32) <- (xf32, 64x1x1xf32) + add_113 = paddle._C_ops.add(full_12, uniform_21) + del uniform_21 + + # pd_op.floor: (64x1x1xf32) <- (64x1x1xf32) + floor_21 = paddle._C_ops.floor(add_113) + del add_113 + + # pd_op.divide: (64x49x768xf32) <- (64x49x768xf32, xf32) + divide_21 = paddle._C_ops.divide(add_83, full_12) + + # pd_op.multiply: (64x49x768xf32) <- (64x49x768xf32, 64x1x1xf32) + multiply_21 = paddle._C_ops.multiply(divide_21, floor_21) + + # pd_op.add: (64x49x768xf32) <- (64x49x768xf32, 64x49x768xf32) + add_84 = paddle._C_ops.add(add_81, multiply_21) + + # pd_op.layer_norm: (64x49x768xf32, 64x49xf32, 64x49xf32) <- (64x49x768xf32, 768xf32, 768xf32) + layer_norm_86, layer_norm_84, layer_norm_85 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_84, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_2, parameter_3 + + # pd_op.transpose: (64x768x49xf32) <- (64x49x768xf32) + transpose_73 = paddle._C_ops.transpose(layer_norm_86, [0, 2, 1]) + del layer_norm_86 + + # pd_op.unsqueeze: (64x768x1x49xf32) <- (64x768x49xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_73, full_int_array_2) + + # pd_op.pool2d: (64x768x1x1xf32) <- (64x768x1x49xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_18, + full_int_array_23, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_23 + + # pd_op.squeeze: (64x768x1xf32) <- (64x768x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_2) + + # pd_op.flatten: (64x768xf32) <- (64x768x1xf32) + flatten_0 = paddle._C_ops.flatten(squeeze_0, 1, 2) + + # pd_op.matmul: (64x102xf32) <- (64x768xf32, 768x102xf32) + matmul_63 = paddle._C_ops.matmul(flatten_0, parameter_1, False, False) + del parameter_1 + + # pd_op.add: (64x102xf32) <- (64x102xf32, 102xf32) + add_85 = paddle._C_ops.add(matmul_63, parameter_0) + del ( + assign_0, + assign_1, + assign_10, + assign_101, + assign_103, + assign_104, + assign_105, + assign_106, + assign_107, + assign_108, + assign_109, + assign_111, + assign_112, + assign_114, + assign_115, + assign_116, + assign_117, + assign_118, + assign_119, + assign_12, + assign_120, + assign_121, + assign_122, + assign_123, + assign_124, + assign_125, + assign_127, + assign_128, + assign_129, + assign_13, + assign_130, + assign_131, + assign_132, + assign_134, + assign_136, + assign_137, + assign_138, + assign_139, + assign_14, + assign_140, + assign_141, + assign_142, + assign_144, + assign_145, + assign_147, + assign_148, + assign_149, + assign_15, + assign_16, + assign_17, + assign_18, + assign_19, + assign_2, + assign_20, + assign_21, + assign_22, + assign_23, + assign_24, + assign_25, + assign_26, + assign_28, + assign_3, + assign_30, + assign_31, + assign_32, + assign_33, + assign_34, + assign_35, + assign_36, + assign_38, + assign_39, + assign_4, + assign_41, + assign_42, + assign_43, + assign_44, + assign_45, + assign_46, + assign_47, + assign_48, + assign_49, + assign_5, + assign_50, + assign_51, + assign_52, + assign_54, + assign_55, + assign_56, + assign_57, + assign_58, + assign_59, + assign_6, + assign_61, + assign_63, + assign_64, + assign_65, + assign_66, + assign_67, + assign_68, + assign_69, + assign_7, + assign_71, + assign_72, + assign_74, + assign_75, + assign_76, + assign_77, + assign_78, + assign_79, + assign_8, + assign_81, + assign_83, + assign_84, + assign_85, + assign_86, + assign_87, + assign_88, + assign_89, + assign_91, + assign_92, + assign_94, + assign_95, + assign_96, + assign_97, + assign_98, + assign_99, + full_int_array_0, + full_int_array_1, + full_int_array_2, + full_int_array_3, + full_int_array_4, + full_int_array_5, + full_int_array_6, + full_int_array_7, + full_int_array_8, + parameter_0, + ) + + return ( + conv2d_0, + reshape_0, + add_0, + transpose_0, + layer_norm_0, + layer_norm_1, + layer_norm_2, + layer_norm_3, + layer_norm_4, + layer_norm_5, + reshape_1, + transpose_1, + reshape_2, + reshape_3, + matmul_0, + add_1, + transpose_2, + slice_0, + full_0, + scale_0, + transpose_3, + matmul_1, + reshape_4, + index_select_0, + transpose_4, + unsqueeze_0, + softmax_0, + transpose_5, + reshape_5, + matmul_2, + add_2, + reshape_6, + transpose_6, + reshape_7, + reshape_8, + add_3, + layer_norm_6, + layer_norm_7, + layer_norm_8, + matmul_3, + add_4, + gelu_0, + matmul_4, + add_5, + add_6, + layer_norm_9, + layer_norm_10, + layer_norm_11, + reshape_9, + roll_0, + transpose_7, + reshape_10, + reshape_11, + matmul_5, + add_7, + transpose_8, + slice_1, + assign_9, + scale_1, + transpose_9, + matmul_6, + reshape_12, + index_select_1, + transpose_10, + unsqueeze_1, + add_8, + reshape_13, + unsqueeze_2, + add_9, + softmax_1, + transpose_11, + reshape_14, + matmul_7, + add_10, + reshape_15, + transpose_12, + reshape_16, + roll_1, + reshape_17, + full_1, + floor_0, + divide_0, + multiply_0, + add_11, + layer_norm_12, + layer_norm_13, + layer_norm_14, + matmul_8, + add_12, + gelu_1, + matmul_9, + add_13, + assign_11, + floor_1, + divide_1, + multiply_1, + add_14, + reshape_18, + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + full_2, + concat_0, + reshape_19, + layer_norm_15, + layer_norm_16, + layer_norm_17, + matmul_10, + layer_norm_18, + layer_norm_19, + layer_norm_20, + reshape_20, + transpose_13, + reshape_21, + reshape_22, + matmul_11, + add_15, + transpose_14, + slice_2, + assign_27, + scale_2, + transpose_15, + matmul_12, + reshape_23, + index_select_2, + transpose_16, + unsqueeze_3, + softmax_2, + transpose_17, + reshape_24, + matmul_13, + add_16, + reshape_25, + transpose_18, + reshape_26, + reshape_27, + full_3, + floor_2, + divide_2, + multiply_2, + add_17, + layer_norm_21, + layer_norm_22, + layer_norm_23, + matmul_14, + add_18, + gelu_2, + matmul_15, + add_19, + assign_29, + floor_3, + divide_3, + multiply_3, + add_20, + layer_norm_24, + layer_norm_25, + layer_norm_26, + reshape_28, + roll_2, + transpose_19, + reshape_29, + reshape_30, + matmul_16, + add_21, + transpose_20, + slice_3, + assign_37, + scale_3, + transpose_21, + matmul_17, + reshape_31, + index_select_3, + transpose_22, + unsqueeze_4, + add_22, + reshape_32, + unsqueeze_5, + add_23, + softmax_3, + transpose_23, + reshape_33, + matmul_18, + add_24, + reshape_34, + transpose_24, + reshape_35, + roll_3, + reshape_36, + full_4, + floor_4, + divide_4, + multiply_4, + add_25, + layer_norm_27, + layer_norm_28, + layer_norm_29, + matmul_19, + add_26, + gelu_3, + matmul_20, + add_27, + assign_40, + floor_5, + divide_5, + multiply_5, + add_28, + reshape_37, + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + assign_53, + concat_1, + reshape_38, + layer_norm_30, + layer_norm_31, + layer_norm_32, + matmul_21, + layer_norm_33, + layer_norm_34, + layer_norm_35, + reshape_39, + transpose_25, + reshape_40, + reshape_41, + matmul_22, + add_29, + transpose_26, + slice_4, + assign_60, + scale_4, + transpose_27, + matmul_23, + reshape_42, + index_select_4, + transpose_28, + unsqueeze_6, + softmax_4, + transpose_29, + reshape_43, + matmul_24, + add_30, + reshape_44, + transpose_30, + reshape_45, + reshape_46, + full_5, + floor_6, + divide_6, + multiply_6, + add_31, + layer_norm_36, + layer_norm_37, + layer_norm_38, + matmul_25, + add_32, + gelu_4, + matmul_26, + add_33, + assign_62, + floor_7, + divide_7, + multiply_7, + add_34, + layer_norm_39, + layer_norm_40, + layer_norm_41, + reshape_47, + roll_4, + transpose_31, + reshape_48, + reshape_49, + matmul_27, + add_35, + transpose_32, + slice_5, + assign_70, + scale_5, + transpose_33, + matmul_28, + reshape_50, + index_select_5, + transpose_34, + unsqueeze_7, + add_36, + reshape_51, + unsqueeze_8, + add_37, + softmax_5, + transpose_35, + reshape_52, + matmul_29, + add_38, + reshape_53, + transpose_36, + reshape_54, + roll_5, + reshape_55, + full_6, + floor_8, + divide_8, + multiply_8, + add_39, + layer_norm_42, + layer_norm_43, + layer_norm_44, + matmul_30, + add_40, + gelu_5, + matmul_31, + add_41, + assign_73, + floor_9, + divide_9, + multiply_9, + add_42, + layer_norm_45, + layer_norm_46, + layer_norm_47, + reshape_56, + transpose_37, + reshape_57, + reshape_58, + matmul_32, + add_43, + transpose_38, + slice_6, + assign_80, + scale_6, + transpose_39, + matmul_33, + reshape_59, + index_select_6, + transpose_40, + unsqueeze_9, + softmax_6, + transpose_41, + reshape_60, + matmul_34, + add_44, + reshape_61, + transpose_42, + reshape_62, + reshape_63, + full_7, + floor_10, + divide_10, + multiply_10, + add_45, + layer_norm_48, + layer_norm_49, + layer_norm_50, + matmul_35, + add_46, + gelu_6, + matmul_36, + add_47, + assign_82, + floor_11, + divide_11, + multiply_11, + add_48, + layer_norm_51, + layer_norm_52, + layer_norm_53, + reshape_64, + roll_6, + transpose_43, + reshape_65, + reshape_66, + matmul_37, + add_49, + transpose_44, + slice_7, + assign_90, + scale_7, + transpose_45, + matmul_38, + reshape_67, + index_select_7, + transpose_46, + unsqueeze_10, + add_50, + reshape_68, + unsqueeze_11, + add_51, + softmax_7, + transpose_47, + reshape_69, + matmul_39, + add_52, + reshape_70, + transpose_48, + reshape_71, + roll_7, + reshape_72, + full_8, + floor_12, + divide_12, + multiply_12, + add_53, + layer_norm_54, + layer_norm_55, + layer_norm_56, + matmul_40, + add_54, + gelu_7, + matmul_41, + add_55, + assign_93, + floor_13, + divide_13, + multiply_13, + add_56, + layer_norm_57, + layer_norm_58, + layer_norm_59, + reshape_73, + transpose_49, + reshape_74, + reshape_75, + matmul_42, + add_57, + transpose_50, + slice_8, + assign_100, + scale_8, + transpose_51, + matmul_43, + reshape_76, + index_select_8, + transpose_52, + unsqueeze_12, + softmax_8, + transpose_53, + reshape_77, + matmul_44, + add_58, + reshape_78, + transpose_54, + reshape_79, + reshape_80, + full_9, + floor_14, + divide_14, + multiply_14, + add_59, + layer_norm_60, + layer_norm_61, + layer_norm_62, + matmul_45, + add_60, + gelu_8, + matmul_46, + add_61, + assign_102, + floor_15, + divide_15, + multiply_15, + add_62, + layer_norm_63, + layer_norm_64, + layer_norm_65, + reshape_81, + roll_8, + transpose_55, + reshape_82, + reshape_83, + matmul_47, + add_63, + transpose_56, + slice_9, + assign_110, + scale_9, + transpose_57, + matmul_48, + reshape_84, + index_select_9, + transpose_58, + unsqueeze_13, + add_64, + reshape_85, + unsqueeze_14, + add_65, + softmax_9, + transpose_59, + reshape_86, + matmul_49, + add_66, + reshape_87, + transpose_60, + reshape_88, + roll_9, + reshape_89, + full_10, + floor_16, + divide_16, + multiply_16, + add_67, + layer_norm_66, + layer_norm_67, + layer_norm_68, + matmul_50, + add_68, + gelu_9, + matmul_51, + add_69, + assign_113, + floor_17, + divide_17, + multiply_17, + add_70, + reshape_90, + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + assign_126, + concat_2, + reshape_91, + layer_norm_69, + layer_norm_70, + layer_norm_71, + matmul_52, + layer_norm_72, + layer_norm_73, + layer_norm_74, + reshape_92, + transpose_61, + reshape_93, + reshape_94, + matmul_53, + add_71, + transpose_62, + slice_10, + assign_133, + scale_10, + transpose_63, + matmul_54, + reshape_95, + index_select_10, + transpose_64, + unsqueeze_15, + softmax_10, + transpose_65, + reshape_96, + matmul_55, + add_72, + reshape_97, + transpose_66, + reshape_98, + reshape_99, + full_11, + floor_18, + divide_18, + multiply_18, + add_73, + layer_norm_75, + layer_norm_76, + layer_norm_77, + matmul_56, + add_74, + gelu_10, + matmul_57, + add_75, + assign_135, + floor_19, + divide_19, + multiply_19, + add_76, + layer_norm_78, + layer_norm_79, + layer_norm_80, + reshape_100, + roll_10, + transpose_67, + reshape_101, + reshape_102, + matmul_58, + add_77, + transpose_68, + slice_11, + assign_143, + scale_11, + transpose_69, + matmul_59, + reshape_103, + index_select_11, + transpose_70, + unsqueeze_16, + add_78, + reshape_104, + unsqueeze_17, + add_79, + softmax_11, + transpose_71, + reshape_105, + matmul_60, + add_80, + reshape_106, + transpose_72, + reshape_107, + roll_11, + reshape_108, + full_12, + floor_20, + divide_20, + multiply_20, + add_81, + layer_norm_81, + layer_norm_82, + layer_norm_83, + matmul_61, + add_82, + gelu_11, + matmul_62, + add_83, + assign_146, + floor_21, + divide_21, + multiply_21, + add_84, + layer_norm_84, + layer_norm_85, + transpose_73, + unsqueeze_18, + pool2d_0, + squeeze_0, + flatten_0, + matmul_63, + add_85, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/weight_meta.py new file mode 100644 index 00000000..fb09e5cb --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_0/weight_meta.py @@ -0,0 +1,1447 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [1536, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [768, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [384, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [384, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [96, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [96, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [96, 288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [384, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [96, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [96, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [96, 288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [96, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/graph_hash.txt new file mode 100644 index 00000000..74dd08e2 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/graph_hash.txt @@ -0,0 +1 @@ +3b849b190c48ef4072dbe90b9d91b07b0dd58fc0a7b7f5f59ae0f85cec51971d \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/graph_net.json b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/input_meta.py b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/input_meta.py new file mode 100644 index 00000000..7f13d677 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/input_meta.py @@ -0,0 +1,223 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [169, 6] + dtype = "float32" + low = -6.35888 + high = 7.23665 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [169, 6] + dtype = "float32" + low = -9.10505 + high = 5.17611 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [169, 12] + dtype = "float32" + low = -10.2283 + high = 4.4655 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [169, 12] + dtype = "float32" + low = -10.566 + high = 4.75054 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [169, 12] + dtype = "float32" + low = -11.5154 + high = 3.18723 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [169, 12] + dtype = "float32" + low = -7.51036 + high = 3.69971 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [169, 12] + dtype = "float32" + low = -10.5965 + high = 3.74179 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [169, 12] + dtype = "float32" + low = -13.1403 + high = 3.28153 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [60, 3, 224, 224] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [169, 3] + dtype = "float32" + low = -10.7122 + high = 4.39779 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [169, 3] + dtype = "float32" + low = -8.27887 + high = 3.6693 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/model.py b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/model.py new file mode 100644 index 00000000..aa5d2391 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/model.py @@ -0,0 +1,7360 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + ): + # pd_op.shape64: (4xi64) <- (-1x3x224x224xf32) + shape64_0 = paddle._C_ops.shape64(data_20) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [0] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_146 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_139 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_136 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_129 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_113 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_106 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_103 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_96 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_93 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_86 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_83 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_76 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_73 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_66 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_63 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_56 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_40 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_33 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_30 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_23 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_12 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_5 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_4 = full_int_array_7 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_0 = full_int_array_7 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_8 = [1] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_141 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_140 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_131 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_130 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_108 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_107 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_98 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_97 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_88 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_87 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_78 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_77 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_68 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_67 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_58 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_57 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_35 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_34 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_25 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_24 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_7 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_6 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_2 = full_int_array_8 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_1 = full_int_array_8 + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + shape64_0, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_0 + + # pd_op.conv2d: (-1x96x56x56xf32) <- (-1x3x224x224xf32, 96x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_20, parameter_160, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_20, parameter_160 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_9 = [1, -1, 1, 1] + + # pd_op.reshape: (1x96x1x1xf32) <- (96xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_159, full_int_array_9) + del full_int_array_9, parameter_159 + + # pd_op.add: (-1x96x56x56xf32) <- (-1x96x56x56xf32, 1x96x1x1xf32) + add_0 = paddle._C_ops.add(conv2d_0, reshape_0) + + # pd_op.shape64: (4xi64) <- (-1x96x56x56xf32) + shape64_1 = paddle._C_ops.shape64(add_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + shape64_1, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_1 + + # pd_op.flatten: (-1x96x3136xf32) <- (-1x96x56x56xf32) + flatten_1 = paddle._C_ops.flatten(add_0, 2, 3) + + # pd_op.transpose: (-1x3136x96xf32) <- (-1x96x3136xf32) + transpose_0 = paddle._C_ops.transpose(flatten_1, [0, 2, 1]) + del flatten_1 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_158, parameter_157, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_157, parameter_158 + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_2 = paddle._C_ops.shape64(layer_norm_0) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + shape64_2, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_2 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.full: (xi64) <- () + full_13 = paddle._C_ops.full( + [], float("56"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_14 = paddle._C_ops.full( + [], float("96"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_0 = [slice_14, full_13, full_13, full_14] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_0 = paddle._C_ops.stack(combine_0, 0) + del combine_0 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x3136x96xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, stack_0) + del stack_0 + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_3 = paddle._C_ops.shape64(reshape_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + shape64_3, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_3 + + # pd_op.full: (xi64) <- () + full_15 = paddle._C_ops.full( + [], float("8"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_16 = paddle._C_ops.full( + [], float("7"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_1 = [slice_15, full_15, full_16, full_15, full_16, full_14] + del slice_15 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_1 = paddle._C_ops.stack(combine_1, 0) + del combine_1 + + # pd_op.reshape: (-1x8x7x8x7x96xf32) <- (-1x56x56x96xf32, 6xi64) + reshape_109 = paddle._C_ops.reshape(reshape_1, stack_1) + del stack_1 + + # pd_op.transpose: (-1x8x8x7x7x96xf32) <- (-1x8x7x8x7x96xf32) + transpose_1 = paddle._C_ops.transpose(reshape_109, [0, 1, 3, 2, 4, 5]) + del reshape_109 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [-1, 7, 7, 96] + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x8x8x7x7x96xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(transpose_1, full_int_array_10) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_11 = [-1, 49, 96] + + # pd_op.reshape: (-1x49x96xf32) <- (-1x7x7x96xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(reshape_2, full_int_array_11) + + # pd_op.shape64: (3xi64) <- (-1x49x96xf32) + shape64_4 = paddle._C_ops.shape64(reshape_3) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + shape64_4, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_4 + + # pd_op.matmul: (-1x49x288xf32) <- (-1x49x96xf32, 96x288xf32) + matmul_0 = paddle._C_ops.matmul(reshape_3, parameter_154, False, False) + del parameter_154 + + # pd_op.add: (-1x49x288xf32) <- (-1x49x288xf32, 288xf32) + add_1 = paddle._C_ops.add(matmul_0, parameter_153) + del parameter_153 + + # pd_op.full: (xi64) <- () + full_17 = paddle._C_ops.full( + [], float("49"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_18 = paddle._C_ops.full( + [], float("3"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_19 = paddle._C_ops.full( + [], float("32"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_2 = [slice_16, full_17, full_18, full_18, full_19] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_2 = paddle._C_ops.stack(combine_2, 0) + del combine_2 + + # pd_op.reshape: (-1x49x3x3x32xf32) <- (-1x49x288xf32, 5xi64) + reshape_110 = paddle._C_ops.reshape(add_1, stack_2) + del stack_2 + + # pd_op.transpose: (3x-1x3x49x32xf32) <- (-1x49x3x3x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_110, [2, 0, 3, 1, 4]) + del reshape_110 + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [2] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_151 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_149 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_143 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_142 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_133 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_132 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_110 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_109 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_100 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_99 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_90 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_89 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_80 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_79 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_70 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_69 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_60 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_59 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_37 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_36 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_27 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_26 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_9 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_8 = full_int_array_0 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_3 = full_int_array_0 + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [3] + + # pd_op.assign: (1xi64) <- (1xi64) + assign_144 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_134 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_111 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_101 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_91 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_81 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_71 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_61 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_38 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_28 = full_int_array_1 + + # pd_op.assign: (1xi64) <- (1xi64) + assign_10 = full_int_array_1 + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full: (1xf32) <- () + full_0 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xf32) <- (1xf32) + assign_145 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_135 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_112 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_102 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_92 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_82 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_72 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_62 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_39 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_29 = full_0 + + # pd_op.assign: (1xf32) <- (1xf32) + assign_11 = full_0 + + # pd_op.scale: (-1x3x49x32xf32) <- (-1x3x49x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_17, full_0, float("0"), True) + del slice_17 + + # pd_op.transpose: (-1x3x32x49xf32) <- (-1x3x49x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_18, [0, 1, 3, 2]) + del slice_18 + + # pd_op.matmul: (-1x3x49x49xf32) <- (-1x3x49x32xf32, -1x3x32x49xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_12 = [-1] + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_4 = paddle._C_ops.reshape(data_21, full_int_array_12) + del data_21 + + # pd_op.index_select: (2401x3xf32) <- (169x3xf32, 2401xi64) + index_select_0 = paddle._C_ops.index_select(data_22, reshape_4, 0) + del data_22 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_13 = [49, 49, -1] + + # pd_op.reshape: (49x49x3xf32) <- (2401x3xf32, 3xi64) + reshape_111 = paddle._C_ops.reshape(index_select_0, full_int_array_13) + + # pd_op.transpose: (3x49x49xf32) <- (49x49x3xf32) + transpose_4 = paddle._C_ops.transpose(reshape_111, [2, 0, 1]) + del reshape_111 + + # pd_op.unsqueeze: (1x3x49x49xf32) <- (3x49x49xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_7) + + # pd_op.add: (-1x3x49x49xf32) <- (-1x3x49x49xf32, 1x3x49x49xf32) + add_86 = paddle._C_ops.add(matmul_1, unsqueeze_0) + + # pd_op.softmax: (-1x3x49x49xf32) <- (-1x3x49x49xf32) + softmax_0 = paddle._C_ops.softmax(add_86, -1) + del add_86 + + # pd_op.matmul: (-1x3x49x32xf32) <- (-1x3x49x49xf32, -1x3x49x32xf32) + matmul_64 = paddle._C_ops.matmul(softmax_0, slice_0, False, False) + + # pd_op.transpose: (-1x49x3x32xf32) <- (-1x3x49x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_64, [0, 2, 1, 3]) + del matmul_64 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_3 = [slice_16, full_17, full_14] + del slice_16 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_3 = paddle._C_ops.stack(combine_3, 0) + del combine_3 + + # pd_op.reshape: (-1x49x96xf32) <- (-1x49x3x32xf32, 3xi64) + reshape_5 = paddle._C_ops.reshape(transpose_5, stack_3) + del stack_3 + + # pd_op.matmul: (-1x49x96xf32) <- (-1x49x96xf32, 96x96xf32) + matmul_2 = paddle._C_ops.matmul(reshape_5, parameter_152, False, False) + del parameter_152 + + # pd_op.add: (-1x49x96xf32) <- (-1x49x96xf32, 96xf32) + add_2 = paddle._C_ops.add(matmul_2, parameter_151) + del parameter_151 + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x49x96xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_2, full_int_array_10) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_14 = [-1, 8, 8, 7, 7, 96] + + # pd_op.reshape: (-1x8x8x7x7x96xf32) <- (-1x7x7x96xf32, 6xi64) + reshape_112 = paddle._C_ops.reshape(reshape_6, full_int_array_14) + + # pd_op.transpose: (-1x8x7x8x7x96xf32) <- (-1x8x8x7x7x96xf32) + transpose_6 = paddle._C_ops.transpose(reshape_112, [0, 1, 3, 2, 4, 5]) + del reshape_112 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_15 = [-1, 56, 56, 96] + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x8x7x8x7x96xf32, 4xi64) + reshape_7 = paddle._C_ops.reshape(transpose_6, full_int_array_15) + + # pd_op.full: (xi64) <- () + full_20 = paddle._C_ops.full( + [], float("3136"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_4 = [slice_14, full_20, full_14] + del slice_14 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_4 = paddle._C_ops.stack(combine_4, 0) + del combine_4 + + # pd_op.reshape: (-1x3136x96xf32) <- (-1x56x56x96xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(reshape_7, stack_4) + del stack_4 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_3 = paddle._C_ops.add(layer_norm_0, reshape_8) + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_3, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # pd_op.matmul: (-1x3136x384xf32) <- (-1x3136x96xf32, 96x384xf32) + matmul_3 = paddle._C_ops.matmul(layer_norm_6, parameter_148, False, False) + del parameter_148 + + # pd_op.add: (-1x3136x384xf32) <- (-1x3136x384xf32, 384xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_147) + del parameter_147 + + # pd_op.gelu: (-1x3136x384xf32) <- (-1x3136x384xf32) + gelu_0 = paddle._C_ops.gelu(add_4, False) + + # pd_op.matmul: (-1x3136x96xf32) <- (-1x3136x384xf32, 384x96xf32) + matmul_4 = paddle._C_ops.matmul(gelu_0, parameter_146, False, False) + del parameter_146 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, 96xf32) + add_5 = paddle._C_ops.add(matmul_4, parameter_145) + del parameter_145 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_6 = paddle._C_ops.add(add_3, add_5) + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_5 = paddle._C_ops.shape64(add_6) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + shape64_5, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_5 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_6, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_5 = [slice_19, full_13, full_13, full_14] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_5 = paddle._C_ops.stack(combine_5, 0) + del combine_5 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x3136x96xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(layer_norm_9, stack_5) + del stack_5 + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_6 = paddle._C_ops.shape64(reshape_9) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + shape64_6, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_2 = [-3, -3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_138 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_105 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_85 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_65 = full_int_array_2 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_32 = full_int_array_2 + + # pd_op.roll: (-1x56x56x96xf32) <- (-1x56x56x96xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_9, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_7 = paddle._C_ops.shape64(roll_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + shape64_7, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_7 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_6 = [slice_21, full_15, full_16, full_15, full_16, full_14] + del full_15, slice_21 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_6 = paddle._C_ops.stack(combine_6, 0) + del combine_6 + + # pd_op.reshape: (-1x8x7x8x7x96xf32) <- (-1x56x56x96xf32, 6xi64) + reshape_113 = paddle._C_ops.reshape(roll_0, stack_6) + del stack_6 + + # pd_op.transpose: (-1x8x8x7x7x96xf32) <- (-1x8x7x8x7x96xf32) + transpose_7 = paddle._C_ops.transpose(reshape_113, [0, 1, 3, 2, 4, 5]) + del reshape_113 + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x8x8x7x7x96xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(transpose_7, full_int_array_10) + + # pd_op.reshape: (-1x49x96xf32) <- (-1x7x7x96xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(reshape_10, full_int_array_11) + del full_int_array_11 + + # pd_op.full: (1x56x56x1xf32) <- () + full_21 = paddle._C_ops.full( + [1, 56, 56, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_16 = [0, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_116 = full_int_array_16 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_43 = full_int_array_16 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_14 = full_int_array_16 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_17 = [-7, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_18 = [1, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_150 = full_int_array_18 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_125 = full_int_array_18 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_52 = full_int_array_18 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_20 = full_int_array_18 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + full_21, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_21 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_19 = [0, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_20 = [-7, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__6, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [0, -3] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-7, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__7, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__7 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [-7, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [-3, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__8, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__8 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__9, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__9 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [-3, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__10, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__10 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_26 = [-3, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_27 = [2147483647, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + set_value__11, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__11 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [2147483647, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [2147483647, 2147483647] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_126 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_123 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_120 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_117 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_53 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_50 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_47 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_44 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_21 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_18 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_16 = full_int_array_29 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_15 = full_int_array_29 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__13 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_30 = [1, 8, 7, 8, 7, 1] + + # pd_op.reshape: (1x8x7x8x7x1xf32) <- (1x56x56x1xf32, 6xi64) + reshape_114 = paddle._C_ops.reshape(set_value__0, full_int_array_30) + del full_int_array_30 + + # pd_op.transpose: (1x8x8x7x7x1xf32) <- (1x8x7x8x7x1xf32) + transpose_74 = paddle._C_ops.transpose(reshape_114, [0, 1, 3, 2, 4, 5]) + del reshape_114 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_31 = [-1, 7, 7, 1] + + # pd_op.reshape: (64x7x7x1xf32) <- (1x8x8x7x7x1xf32, 4xi64) + reshape_115 = paddle._C_ops.reshape(transpose_74, full_int_array_31) + del transpose_74 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [-1, 49] + + # pd_op.reshape: (64x49xf32) <- (64x7x7x1xf32, 2xi64) + reshape_116 = paddle._C_ops.reshape(reshape_115, full_int_array_32) + del reshape_115 + + # pd_op.unsqueeze: (64x1x49xf32) <- (64x49xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(reshape_116, full_int_array_8) + + # pd_op.unsqueeze: (64x49x1xf32) <- (64x49xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(reshape_116, full_int_array_0) + del reshape_116 + + # pd_op.subtract: (64x49x49xf32) <- (64x1x49xf32, 64x49x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_19, unsqueeze_20) + del unsqueeze_19, unsqueeze_20 + + # pd_op.full: (xf32) <- () + full_22 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x49x49xb) <- (64x49x49xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_22) + + # pd_op.full: (64x49x49xf32) <- () + full_23 = paddle._C_ops.full( + [64, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_23, subtract_0) + del full_23, not_equal_0, subtract_0 + + # pd_op.equal: (64x49x49xb) <- (64x49x49xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_22) + + # pd_op.full: (64x49x49xf32) <- () + full_24 = paddle._C_ops.full( + [64, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_1 = paddle._C_ops.where(equal_0, full_24, where_0) + del equal_0, full_24, where_0 + + # pd_op.shape64: (3xi64) <- (-1x49x96xf32) + shape64_8 = paddle._C_ops.shape64(reshape_11) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + shape64_8, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_8 + + # pd_op.matmul: (-1x49x288xf32) <- (-1x49x96xf32, 96x288xf32) + matmul_5 = paddle._C_ops.matmul(reshape_11, parameter_142, False, False) + del parameter_142 + + # pd_op.add: (-1x49x288xf32) <- (-1x49x288xf32, 288xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_141) + del parameter_141 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_7 = [slice_22, full_17, full_18, full_18, full_19] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_7 = paddle._C_ops.stack(combine_7, 0) + del combine_7 + + # pd_op.reshape: (-1x49x3x3x32xf32) <- (-1x49x288xf32, 5xi64) + reshape_117 = paddle._C_ops.reshape(add_7, stack_7) + del stack_7 + + # pd_op.transpose: (3x-1x3x49x32xf32) <- (-1x49x3x3x32xf32) + transpose_8 = paddle._C_ops.transpose(reshape_117, [2, 0, 3, 1, 4]) + del reshape_117 + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + transpose_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x3x49x32xf32) <- (-1x3x49x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_23, full_0, float("0"), True) + del slice_23 + + # pd_op.transpose: (-1x3x32x49xf32) <- (-1x3x49x32xf32) + transpose_9 = paddle._C_ops.transpose(slice_24, [0, 1, 3, 2]) + del slice_24 + + # pd_op.matmul: (-1x3x49x49xf32) <- (-1x3x49x32xf32, -1x3x32x49xf32) + matmul_6 = paddle._C_ops.matmul(scale_1, transpose_9, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_12 = paddle._C_ops.reshape(data_23, full_int_array_12) + del data_23 + + # pd_op.index_select: (2401x3xf32) <- (169x3xf32, 2401xi64) + index_select_1 = paddle._C_ops.index_select(data_24, reshape_12, 0) + del data_24 + + # pd_op.reshape: (49x49x3xf32) <- (2401x3xf32, 3xi64) + reshape_118 = paddle._C_ops.reshape(index_select_1, full_int_array_13) + + # pd_op.transpose: (3x49x49xf32) <- (49x49x3xf32) + transpose_10 = paddle._C_ops.transpose(reshape_118, [2, 0, 1]) + del reshape_118 + + # pd_op.unsqueeze: (1x3x49x49xf32) <- (3x49x49xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(transpose_10, full_int_array_7) + + # pd_op.add: (-1x3x49x49xf32) <- (-1x3x49x49xf32, 1x3x49x49xf32) + add_8 = paddle._C_ops.add(matmul_6, unsqueeze_1) + + # pd_op.full: (xi64) <- () + full_25 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_0 = paddle._C_ops.floor_divide(slice_22, full_25) + del full_25 + + # pd_op.full: (xi64) <- () + full_26 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_8 = [floor_divide_0, full_26, full_18, full_17, full_17] + del floor_divide_0, full_26 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_8 = paddle._C_ops.stack(combine_8, 0) + del combine_8 + + # pd_op.reshape: (-1x64x3x49x49xf32) <- (-1x3x49x49xf32, 5xi64) + reshape_13 = paddle._C_ops.reshape(add_8, stack_8) + del stack_8 + + # pd_op.unsqueeze: (64x1x49x49xf32) <- (64x49x49xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(where_1, full_int_array_8) + del where_1 + + # pd_op.unsqueeze: (1x64x1x49x49xf32) <- (64x1x49x49xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(unsqueeze_21, full_int_array_7) + del unsqueeze_21 + + # pd_op.add: (-1x64x3x49x49xf32) <- (-1x64x3x49x49xf32, 1x64x1x49x49xf32) + add_9 = paddle._C_ops.add(reshape_13, unsqueeze_2) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_9 = [slice_22, full_18, full_17, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_9 = paddle._C_ops.stack(combine_9, 0) + del combine_9 + + # pd_op.reshape: (-1x3x49x49xf32) <- (-1x64x3x49x49xf32, 4xi64) + reshape_119 = paddle._C_ops.reshape(add_9, stack_9) + del stack_9 + + # pd_op.softmax: (-1x3x49x49xf32) <- (-1x3x49x49xf32) + softmax_1 = paddle._C_ops.softmax(reshape_119, -1) + del reshape_119 + + # pd_op.matmul: (-1x3x49x32xf32) <- (-1x3x49x49xf32, -1x3x49x32xf32) + matmul_65 = paddle._C_ops.matmul(softmax_1, slice_1, False, False) + + # pd_op.transpose: (-1x49x3x32xf32) <- (-1x3x49x32xf32) + transpose_11 = paddle._C_ops.transpose(matmul_65, [0, 2, 1, 3]) + del matmul_65 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_10 = [slice_22, full_17, full_14] + del slice_22 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_10 = paddle._C_ops.stack(combine_10, 0) + del combine_10 + + # pd_op.reshape: (-1x49x96xf32) <- (-1x49x3x32xf32, 3xi64) + reshape_14 = paddle._C_ops.reshape(transpose_11, stack_10) + del stack_10 + + # pd_op.matmul: (-1x49x96xf32) <- (-1x49x96xf32, 96x96xf32) + matmul_7 = paddle._C_ops.matmul(reshape_14, parameter_140, False, False) + del parameter_140 + + # pd_op.add: (-1x49x96xf32) <- (-1x49x96xf32, 96xf32) + add_10 = paddle._C_ops.add(matmul_7, parameter_139) + del parameter_139 + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x49x96xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(add_10, full_int_array_10) + del full_int_array_10 + + # pd_op.reshape: (-1x8x8x7x7x96xf32) <- (-1x7x7x96xf32, 6xi64) + reshape_120 = paddle._C_ops.reshape(reshape_15, full_int_array_14) + del full_int_array_14 + + # pd_op.transpose: (-1x8x7x8x7x96xf32) <- (-1x8x8x7x7x96xf32) + transpose_12 = paddle._C_ops.transpose(reshape_120, [0, 1, 3, 2, 4, 5]) + del reshape_120 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x8x7x8x7x96xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(transpose_12, full_int_array_15) + del full_int_array_15 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_3 = [3, 3] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_147 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_114 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_94 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_74 = full_int_array_3 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_41 = full_int_array_3 + + # pd_op.roll: (-1x56x56x96xf32) <- (-1x56x56x96xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_16, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_11 = [slice_19, full_20, full_14] + del full_20, slice_19 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_11 = paddle._C_ops.stack(combine_11, 0) + del combine_11 + + # pd_op.reshape: (-1x3136x96xf32) <- (-1x56x56x96xf32, 3xi64) + reshape_17 = paddle._C_ops.reshape(roll_1, stack_11) + del stack_11 + + # pd_op.full: (xf32) <- () + full_1 = paddle._C_ops.full( + [], + float("0.981818"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_13 = full_1 + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_9 = paddle._C_ops.shape64(reshape_17) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + shape64_9, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_9 + + # pd_op.full: (xi64) <- () + full_27 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_12 = [slice_25, full_27, full_27] + del slice_25 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_12 = paddle._C_ops.stack(combine_12, 0) + del combine_12 + + # pd_op.full: (1xf32) <- () + full_28 = paddle._C_ops.full( + [1], float("0"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xf32) <- () + full_29 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_0 = paddle._C_ops.uniform( + stack_12, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_12 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_87 = paddle._C_ops.add(full_1, uniform_0) + del uniform_0 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_0 = paddle._C_ops.floor(add_87) + del add_87 + + # pd_op.divide: (-1x3136x96xf32) <- (-1x3136x96xf32, xf32) + divide_0 = paddle._C_ops.divide(reshape_17, full_1) + + # pd_op.multiply: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x1x1xf32) + multiply_0 = paddle._C_ops.multiply(divide_0, floor_0) + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_11 = paddle._C_ops.add(add_6, multiply_0) + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # pd_op.matmul: (-1x3136x384xf32) <- (-1x3136x96xf32, 96x384xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_12, parameter_136, False, False) + del parameter_136 + + # pd_op.add: (-1x3136x384xf32) <- (-1x3136x384xf32, 384xf32) + add_12 = paddle._C_ops.add(matmul_8, parameter_135) + del parameter_135 + + # pd_op.gelu: (-1x3136x384xf32) <- (-1x3136x384xf32) + gelu_1 = paddle._C_ops.gelu(add_12, False) + + # pd_op.matmul: (-1x3136x96xf32) <- (-1x3136x384xf32, 384x96xf32) + matmul_9 = paddle._C_ops.matmul(gelu_1, parameter_134, False, False) + del parameter_134 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, 96xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_133) + del parameter_133 + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_10 = paddle._C_ops.shape64(add_13) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + shape64_10, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_10 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_13 = [slice_26, full_27, full_27] + del slice_26 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_13 = paddle._C_ops.stack(combine_13, 0) + del combine_13 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_1 = paddle._C_ops.uniform( + stack_13, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_13 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_88 = paddle._C_ops.add(full_1, uniform_1) + del uniform_1 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_1 = paddle._C_ops.floor(add_88) + del add_88 + + # pd_op.divide: (-1x3136x96xf32) <- (-1x3136x96xf32, xf32) + divide_1 = paddle._C_ops.divide(add_13, full_1) + + # pd_op.multiply: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x1x1xf32) + multiply_1 = paddle._C_ops.multiply(divide_1, floor_1) + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_14 = paddle._C_ops.add(add_11, multiply_1) + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_11 = paddle._C_ops.shape64(add_14) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + shape64_11, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_11 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_14 = [slice_27, full_13, full_13, full_14] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_14 = paddle._C_ops.stack(combine_14, 0) + del combine_14 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x3136x96xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_14, stack_14) + del stack_14 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_4 = [2, 2] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_127 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_124 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_121 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_118 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_54 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_51 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_48 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_45 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_22 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_19 = full_int_array_4 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_17 = full_int_array_4 + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_16, full_int_array_29, full_int_array_4 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_5 = [1, 0] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_119 = full_int_array_5 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_46 = full_int_array_5 + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_5, full_int_array_29, full_int_array_4 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_6 = [0, 1] + + # pd_op.assign: (2xi64) <- (2xi64) + assign_122 = full_int_array_6 + + # pd_op.assign: (2xi64) <- (2xi64) + assign_49 = full_int_array_6 + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_6, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_18, [1, 2], full_int_array_18, full_int_array_29, full_int_array_4 + ) + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_12 = paddle._C_ops.shape64(reshape_18) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + shape64_12, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_15 = [slice_28, full_13, full_13, full_14] + del full_13, full_14, slice_28 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_15 = paddle._C_ops.stack(combine_15, 0) + del combine_15 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x56x56x96xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(reshape_18, stack_15) + del stack_15 + + # pd_op.full: (1xi32) <- () + full_2 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.assign: (1xi32) <- (1xi32) + assign_128 = full_2 + + # pd_op.assign: (1xi32) <- (1xi32) + assign_55 = full_2 + + # builtin.combine: ([-1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32]) <- (-1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32) + combine_16 = [ + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + ] + + # pd_op.concat: (-1x28x28x384xf32) <- ([-1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_16, full_2) + del combine_16 + + # pd_op.full: (xi64) <- () + full_30 = paddle._C_ops.full( + [], float("-1"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_31 = paddle._C_ops.full( + [], float("384"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_17 = [slice_27, full_30, full_31] + del slice_27 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_16 = paddle._C_ops.stack(combine_17, 0) + del combine_17 + + # pd_op.reshape: (-1x-1x384xf32) <- (-1x28x28x384xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(concat_0, stack_16) + del stack_16 + + # pd_op.layer_norm: (-1x-1x384xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x384xf32, 384xf32, 384xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_19, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132 + + # pd_op.matmul: (-1x-1x192xf32) <- (-1x-1x384xf32, 384x192xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_15, parameter_130, False, False) + del parameter_130 + + # pd_op.shape64: (3xi64) <- (-1x-1x192xf32) + shape64_13 = paddle._C_ops.shape64(matmul_10) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + shape64_13, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_13 + + # pd_op.shape64: (3xi64) <- (-1x-1x192xf32) + shape64_14 = paddle._C_ops.shape64(matmul_10) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + shape64_14, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + del shape64_14 + + # pd_op.layer_norm: (-1x-1x192xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x192xf32, 192xf32, 192xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_10, parameter_129, parameter_128, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_128, parameter_129 + + # pd_op.full: (xi64) <- () + full_32 = paddle._C_ops.full( + [], float("28"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_33 = paddle._C_ops.full( + [], float("192"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_18 = [slice_29, full_32, full_32, full_33] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_17 = paddle._C_ops.stack(combine_18, 0) + del combine_18 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x-1x192xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(layer_norm_18, stack_17) + del stack_17 + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_15 = paddle._C_ops.shape64(reshape_20) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + shape64_15, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_15 + + # pd_op.full: (xi64) <- () + full_34 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_19 = [slice_31, full_34, full_16, full_34, full_16, full_33] + del slice_31 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_18 = paddle._C_ops.stack(combine_19, 0) + del combine_19 + + # pd_op.reshape: (-1x4x7x4x7x192xf32) <- (-1x28x28x192xf32, 6xi64) + reshape_122 = paddle._C_ops.reshape(reshape_20, stack_18) + del stack_18 + + # pd_op.transpose: (-1x4x4x7x7x192xf32) <- (-1x4x7x4x7x192xf32) + transpose_13 = paddle._C_ops.transpose(reshape_122, [0, 1, 3, 2, 4, 5]) + del reshape_122 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_33 = [-1, 7, 7, 192] + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x4x4x7x7x192xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(transpose_13, full_int_array_33) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_34 = [-1, 49, 192] + + # pd_op.reshape: (-1x49x192xf32) <- (-1x7x7x192xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(reshape_21, full_int_array_34) + + # pd_op.shape64: (3xi64) <- (-1x49x192xf32) + shape64_16 = paddle._C_ops.shape64(reshape_22) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + shape64_16, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_16 + + # pd_op.matmul: (-1x49x576xf32) <- (-1x49x192xf32, 192x576xf32) + matmul_11 = paddle._C_ops.matmul(reshape_22, parameter_127, False, False) + del parameter_127 + + # pd_op.add: (-1x49x576xf32) <- (-1x49x576xf32, 576xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_126) + del parameter_126 + + # pd_op.full: (xi64) <- () + full_35 = paddle._C_ops.full( + [], float("6"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_20 = [slice_32, full_17, full_18, full_35, full_19] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_19 = paddle._C_ops.stack(combine_20, 0) + del combine_20 + + # pd_op.reshape: (-1x49x3x6x32xf32) <- (-1x49x576xf32, 5xi64) + reshape_123 = paddle._C_ops.reshape(add_15, stack_19) + del stack_19 + + # pd_op.transpose: (3x-1x6x49x32xf32) <- (-1x49x3x6x32xf32) + transpose_14 = paddle._C_ops.transpose(reshape_123, [2, 0, 3, 1, 4]) + del reshape_123 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + transpose_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x6x49x32xf32) <- (-1x6x49x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_33, full_0, float("0"), True) + del slice_33 + + # pd_op.transpose: (-1x6x32x49xf32) <- (-1x6x49x32xf32) + transpose_15 = paddle._C_ops.transpose(slice_34, [0, 1, 3, 2]) + del slice_34 + + # pd_op.matmul: (-1x6x49x49xf32) <- (-1x6x49x32xf32, -1x6x32x49xf32) + matmul_12 = paddle._C_ops.matmul(scale_2, transpose_15, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_23 = paddle._C_ops.reshape(data_0, full_int_array_12) + del data_0 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_2 = paddle._C_ops.index_select(data_1, reshape_23, 0) + del data_1 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_124 = paddle._C_ops.reshape(index_select_2, full_int_array_13) + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_16 = paddle._C_ops.transpose(reshape_124, [2, 0, 1]) + del reshape_124 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_16, full_int_array_7) + + # pd_op.add: (-1x6x49x49xf32) <- (-1x6x49x49xf32, 1x6x49x49xf32) + add_89 = paddle._C_ops.add(matmul_12, unsqueeze_3) + + # pd_op.softmax: (-1x6x49x49xf32) <- (-1x6x49x49xf32) + softmax_2 = paddle._C_ops.softmax(add_89, -1) + del add_89 + + # pd_op.matmul: (-1x6x49x32xf32) <- (-1x6x49x49xf32, -1x6x49x32xf32) + matmul_66 = paddle._C_ops.matmul(softmax_2, slice_2, False, False) + + # pd_op.transpose: (-1x49x6x32xf32) <- (-1x6x49x32xf32) + transpose_17 = paddle._C_ops.transpose(matmul_66, [0, 2, 1, 3]) + del matmul_66 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_21 = [slice_32, full_17, full_33] + del slice_32 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_20 = paddle._C_ops.stack(combine_21, 0) + del combine_21 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x49x6x32xf32, 3xi64) + reshape_24 = paddle._C_ops.reshape(transpose_17, stack_20) + del stack_20 + + # pd_op.matmul: (-1x49x192xf32) <- (-1x49x192xf32, 192x192xf32) + matmul_13 = paddle._C_ops.matmul(reshape_24, parameter_125, False, False) + del parameter_125 + + # pd_op.add: (-1x49x192xf32) <- (-1x49x192xf32, 192xf32) + add_16 = paddle._C_ops.add(matmul_13, parameter_124) + del parameter_124 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x49x192xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_16, full_int_array_33) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [-1, 4, 4, 7, 7, 192] + + # pd_op.reshape: (-1x4x4x7x7x192xf32) <- (-1x7x7x192xf32, 6xi64) + reshape_125 = paddle._C_ops.reshape(reshape_25, full_int_array_35) + + # pd_op.transpose: (-1x4x7x4x7x192xf32) <- (-1x4x4x7x7x192xf32) + transpose_18 = paddle._C_ops.transpose(reshape_125, [0, 1, 3, 2, 4, 5]) + del reshape_125 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 28, 28, 192] + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x4x7x4x7x192xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(transpose_18, full_int_array_36) + + # pd_op.full: (xi64) <- () + full_36 = paddle._C_ops.full( + [], float("784"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_22 = [slice_29, full_36, full_33] + del slice_29 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_21 = paddle._C_ops.stack(combine_22, 0) + del combine_22 + + # pd_op.reshape: (-1x784x192xf32) <- (-1x28x28x192xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, stack_21) + del stack_21 + + # pd_op.full: (xf32) <- () + full_3 = paddle._C_ops.full( + [], + float("0.963636"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_31 = full_3 + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_17 = paddle._C_ops.shape64(reshape_27) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + shape64_17, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_17 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_23 = [slice_35, full_27, full_27] + del slice_35 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_22 = paddle._C_ops.stack(combine_23, 0) + del combine_23 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_2 = paddle._C_ops.uniform( + stack_22, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_22 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_90 = paddle._C_ops.add(full_3, uniform_2) + del uniform_2 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_2 = paddle._C_ops.floor(add_90) + del add_90 + + # pd_op.divide: (-1x784x192xf32) <- (-1x784x192xf32, xf32) + divide_2 = paddle._C_ops.divide(reshape_27, full_3) + + # pd_op.multiply: (-1x784x192xf32) <- (-1x784x192xf32, -1x1x1xf32) + multiply_2 = paddle._C_ops.multiply(divide_2, floor_2) + + # pd_op.add: (-1x784x192xf32) <- (-1x-1x192xf32, -1x784x192xf32) + add_17 = paddle._C_ops.add(matmul_10, multiply_2) + + # pd_op.layer_norm: (-1x784x192xf32, -1x784xf32, -1x784xf32) <- (-1x784x192xf32, 192xf32, 192xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_123, parameter_122, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_122, parameter_123 + + # pd_op.matmul: (-1x784x768xf32) <- (-1x784x192xf32, 192x768xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_21, parameter_121, False, False) + del parameter_121 + + # pd_op.add: (-1x784x768xf32) <- (-1x784x768xf32, 768xf32) + add_18 = paddle._C_ops.add(matmul_14, parameter_120) + del parameter_120 + + # pd_op.gelu: (-1x784x768xf32) <- (-1x784x768xf32) + gelu_2 = paddle._C_ops.gelu(add_18, False) + + # pd_op.matmul: (-1x784x192xf32) <- (-1x784x768xf32, 768x192xf32) + matmul_15 = paddle._C_ops.matmul(gelu_2, parameter_119, False, False) + del parameter_119 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, 192xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_118) + del parameter_118 + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_18 = paddle._C_ops.shape64(add_19) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + shape64_18, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_18 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_24 = [slice_36, full_27, full_27] + del slice_36 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_23 = paddle._C_ops.stack(combine_24, 0) + del combine_24 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_3 = paddle._C_ops.uniform( + stack_23, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_23 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_91 = paddle._C_ops.add(full_3, uniform_3) + del uniform_3 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_3 = paddle._C_ops.floor(add_91) + del add_91 + + # pd_op.divide: (-1x784x192xf32) <- (-1x784x192xf32, xf32) + divide_3 = paddle._C_ops.divide(add_19, full_3) + + # pd_op.multiply: (-1x784x192xf32) <- (-1x784x192xf32, -1x1x1xf32) + multiply_3 = paddle._C_ops.multiply(divide_3, floor_3) + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, -1x784x192xf32) + add_20 = paddle._C_ops.add(add_17, multiply_3) + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_19 = paddle._C_ops.shape64(add_20) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + shape64_19, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_19 + + # pd_op.layer_norm: (-1x784x192xf32, -1x784xf32, -1x784xf32) <- (-1x784x192xf32, 192xf32, 192xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_117, parameter_116, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_116, parameter_117 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_25 = [slice_37, full_32, full_32, full_33] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_24 = paddle._C_ops.stack(combine_25, 0) + del combine_25 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x784x192xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(layer_norm_24, stack_24) + del stack_24 + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_20 = paddle._C_ops.shape64(reshape_28) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + shape64_20, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_20 + + # pd_op.roll: (-1x28x28x192xf32) <- (-1x28x28x192xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_28, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_21 = paddle._C_ops.shape64(roll_2) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + shape64_21, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_21 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_26 = [slice_39, full_34, full_16, full_34, full_16, full_33] + del slice_39 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_25 = paddle._C_ops.stack(combine_26, 0) + del combine_26 + + # pd_op.reshape: (-1x4x7x4x7x192xf32) <- (-1x28x28x192xf32, 6xi64) + reshape_126 = paddle._C_ops.reshape(roll_2, stack_25) + del stack_25 + + # pd_op.transpose: (-1x4x4x7x7x192xf32) <- (-1x4x7x4x7x192xf32) + transpose_19 = paddle._C_ops.transpose(reshape_126, [0, 1, 3, 2, 4, 5]) + del reshape_126 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x4x4x7x7x192xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(transpose_19, full_int_array_33) + + # pd_op.reshape: (-1x49x192xf32) <- (-1x7x7x192xf32, 3xi64) + reshape_30 = paddle._C_ops.reshape(reshape_29, full_int_array_34) + del full_int_array_34 + + # pd_op.full: (1x28x28x1xf32) <- () + full_37 = paddle._C_ops.full( + [1, 28, 28, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + full_37, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_37 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__14 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__15 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__16 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__17 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__18 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__19 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__20 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__21 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_37 = [1, 4, 7, 4, 7, 1] + + # pd_op.reshape: (1x4x7x4x7x1xf32) <- (1x28x28x1xf32, 6xi64) + reshape_127 = paddle._C_ops.reshape(set_value__1, full_int_array_37) + del full_int_array_37 + + # pd_op.transpose: (1x4x4x7x7x1xf32) <- (1x4x7x4x7x1xf32) + transpose_75 = paddle._C_ops.transpose(reshape_127, [0, 1, 3, 2, 4, 5]) + del reshape_127 + + # pd_op.reshape: (16x7x7x1xf32) <- (1x4x4x7x7x1xf32, 4xi64) + reshape_128 = paddle._C_ops.reshape(transpose_75, full_int_array_31) + del transpose_75 + + # pd_op.reshape: (16x49xf32) <- (16x7x7x1xf32, 2xi64) + reshape_129 = paddle._C_ops.reshape(reshape_128, full_int_array_32) + del reshape_128 + + # pd_op.unsqueeze: (16x1x49xf32) <- (16x49xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(reshape_129, full_int_array_8) + + # pd_op.unsqueeze: (16x49x1xf32) <- (16x49xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(reshape_129, full_int_array_0) + del reshape_129 + + # pd_op.subtract: (16x49x49xf32) <- (16x1x49xf32, 16x49x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_22, unsqueeze_23) + del unsqueeze_22, unsqueeze_23 + + # pd_op.not_equal: (16x49x49xb) <- (16x49x49xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_22) + + # pd_op.full: (16x49x49xf32) <- () + full_38 = paddle._C_ops.full( + [16, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_38, subtract_1) + del full_38, not_equal_1, subtract_1 + + # pd_op.equal: (16x49x49xb) <- (16x49x49xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_22) + + # pd_op.full: (16x49x49xf32) <- () + full_39 = paddle._C_ops.full( + [16, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_3 = paddle._C_ops.where(equal_1, full_39, where_2) + del equal_1, full_39, where_2 + + # pd_op.shape64: (3xi64) <- (-1x49x192xf32) + shape64_22 = paddle._C_ops.shape64(reshape_30) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + shape64_22, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_22 + + # pd_op.matmul: (-1x49x576xf32) <- (-1x49x192xf32, 192x576xf32) + matmul_16 = paddle._C_ops.matmul(reshape_30, parameter_115, False, False) + del parameter_115 + + # pd_op.add: (-1x49x576xf32) <- (-1x49x576xf32, 576xf32) + add_21 = paddle._C_ops.add(matmul_16, parameter_114) + del parameter_114 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_27 = [slice_40, full_17, full_18, full_35, full_19] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_26 = paddle._C_ops.stack(combine_27, 0) + del combine_27 + + # pd_op.reshape: (-1x49x3x6x32xf32) <- (-1x49x576xf32, 5xi64) + reshape_130 = paddle._C_ops.reshape(add_21, stack_26) + del stack_26 + + # pd_op.transpose: (3x-1x6x49x32xf32) <- (-1x49x3x6x32xf32) + transpose_20 = paddle._C_ops.transpose(reshape_130, [2, 0, 3, 1, 4]) + del reshape_130 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + transpose_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x6x49x32xf32) <- (-1x6x49x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_41, full_0, float("0"), True) + del slice_41 + + # pd_op.transpose: (-1x6x32x49xf32) <- (-1x6x49x32xf32) + transpose_21 = paddle._C_ops.transpose(slice_42, [0, 1, 3, 2]) + del slice_42 + + # pd_op.matmul: (-1x6x49x49xf32) <- (-1x6x49x32xf32, -1x6x32x49xf32) + matmul_17 = paddle._C_ops.matmul(scale_3, transpose_21, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_31 = paddle._C_ops.reshape(data_2, full_int_array_12) + del data_2 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_3 = paddle._C_ops.index_select(data_3, reshape_31, 0) + del data_3 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_131 = paddle._C_ops.reshape(index_select_3, full_int_array_13) + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_22 = paddle._C_ops.transpose(reshape_131, [2, 0, 1]) + del reshape_131 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(transpose_22, full_int_array_7) + + # pd_op.add: (-1x6x49x49xf32) <- (-1x6x49x49xf32, 1x6x49x49xf32) + add_22 = paddle._C_ops.add(matmul_17, unsqueeze_4) + + # pd_op.full: (xi64) <- () + full_40 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_1 = paddle._C_ops.floor_divide(slice_40, full_40) + del full_40 + + # pd_op.full: (xi64) <- () + full_41 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_28 = [floor_divide_1, full_41, full_35, full_17, full_17] + del floor_divide_1, full_41 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_27 = paddle._C_ops.stack(combine_28, 0) + del combine_28 + + # pd_op.reshape: (-1x16x6x49x49xf32) <- (-1x6x49x49xf32, 5xi64) + reshape_32 = paddle._C_ops.reshape(add_22, stack_27) + del stack_27 + + # pd_op.unsqueeze: (16x1x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(where_3, full_int_array_8) + del where_3 + + # pd_op.unsqueeze: (1x16x1x49x49xf32) <- (16x1x49x49xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_24, full_int_array_7) + del unsqueeze_24 + + # pd_op.add: (-1x16x6x49x49xf32) <- (-1x16x6x49x49xf32, 1x16x1x49x49xf32) + add_23 = paddle._C_ops.add(reshape_32, unsqueeze_5) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_29 = [slice_40, full_35, full_17, full_17] + del full_35 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_28 = paddle._C_ops.stack(combine_29, 0) + del combine_29 + + # pd_op.reshape: (-1x6x49x49xf32) <- (-1x16x6x49x49xf32, 4xi64) + reshape_132 = paddle._C_ops.reshape(add_23, stack_28) + del stack_28 + + # pd_op.softmax: (-1x6x49x49xf32) <- (-1x6x49x49xf32) + softmax_3 = paddle._C_ops.softmax(reshape_132, -1) + del reshape_132 + + # pd_op.matmul: (-1x6x49x32xf32) <- (-1x6x49x49xf32, -1x6x49x32xf32) + matmul_67 = paddle._C_ops.matmul(softmax_3, slice_3, False, False) + + # pd_op.transpose: (-1x49x6x32xf32) <- (-1x6x49x32xf32) + transpose_23 = paddle._C_ops.transpose(matmul_67, [0, 2, 1, 3]) + del matmul_67 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_30 = [slice_40, full_17, full_33] + del slice_40 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_29 = paddle._C_ops.stack(combine_30, 0) + del combine_30 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x49x6x32xf32, 3xi64) + reshape_33 = paddle._C_ops.reshape(transpose_23, stack_29) + del stack_29 + + # pd_op.matmul: (-1x49x192xf32) <- (-1x49x192xf32, 192x192xf32) + matmul_18 = paddle._C_ops.matmul(reshape_33, parameter_113, False, False) + del parameter_113 + + # pd_op.add: (-1x49x192xf32) <- (-1x49x192xf32, 192xf32) + add_24 = paddle._C_ops.add(matmul_18, parameter_112) + del parameter_112 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x49x192xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_24, full_int_array_33) + del full_int_array_33 + + # pd_op.reshape: (-1x4x4x7x7x192xf32) <- (-1x7x7x192xf32, 6xi64) + reshape_133 = paddle._C_ops.reshape(reshape_34, full_int_array_35) + del full_int_array_35 + + # pd_op.transpose: (-1x4x7x4x7x192xf32) <- (-1x4x4x7x7x192xf32) + transpose_24 = paddle._C_ops.transpose(reshape_133, [0, 1, 3, 2, 4, 5]) + del reshape_133 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x4x7x4x7x192xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_24, full_int_array_36) + del full_int_array_36 + + # pd_op.roll: (-1x28x28x192xf32) <- (-1x28x28x192xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_35, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_31 = [slice_37, full_36, full_33] + del full_36, slice_37 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_30 = paddle._C_ops.stack(combine_31, 0) + del combine_31 + + # pd_op.reshape: (-1x784x192xf32) <- (-1x28x28x192xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(roll_3, stack_30) + del stack_30 + + # pd_op.full: (xf32) <- () + full_4 = paddle._C_ops.full( + [], + float("0.945455"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_42 = full_4 + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_23 = paddle._C_ops.shape64(reshape_36) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + shape64_23, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_23 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_32 = [slice_43, full_27, full_27] + del slice_43 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_31 = paddle._C_ops.stack(combine_32, 0) + del combine_32 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_4 = paddle._C_ops.uniform( + stack_31, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_31 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_92 = paddle._C_ops.add(full_4, uniform_4) + del uniform_4 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_4 = paddle._C_ops.floor(add_92) + del add_92 + + # pd_op.divide: (-1x784x192xf32) <- (-1x784x192xf32, xf32) + divide_4 = paddle._C_ops.divide(reshape_36, full_4) + + # pd_op.multiply: (-1x784x192xf32) <- (-1x784x192xf32, -1x1x1xf32) + multiply_4 = paddle._C_ops.multiply(divide_4, floor_4) + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, -1x784x192xf32) + add_25 = paddle._C_ops.add(add_20, multiply_4) + + # pd_op.layer_norm: (-1x784x192xf32, -1x784xf32, -1x784xf32) <- (-1x784x192xf32, 192xf32, 192xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_111, parameter_110, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_110, parameter_111 + + # pd_op.matmul: (-1x784x768xf32) <- (-1x784x192xf32, 192x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_27, parameter_109, False, False) + del parameter_109 + + # pd_op.add: (-1x784x768xf32) <- (-1x784x768xf32, 768xf32) + add_26 = paddle._C_ops.add(matmul_19, parameter_108) + del parameter_108 + + # pd_op.gelu: (-1x784x768xf32) <- (-1x784x768xf32) + gelu_3 = paddle._C_ops.gelu(add_26, False) + + # pd_op.matmul: (-1x784x192xf32) <- (-1x784x768xf32, 768x192xf32) + matmul_20 = paddle._C_ops.matmul(gelu_3, parameter_107, False, False) + del parameter_107 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, 192xf32) + add_27 = paddle._C_ops.add(matmul_20, parameter_106) + del parameter_106 + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_24 = paddle._C_ops.shape64(add_27) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + shape64_24, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_24 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_33 = [slice_44, full_27, full_27] + del slice_44 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_32 = paddle._C_ops.stack(combine_33, 0) + del combine_33 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_5 = paddle._C_ops.uniform( + stack_32, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_32 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_93 = paddle._C_ops.add(full_4, uniform_5) + del uniform_5 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_5 = paddle._C_ops.floor(add_93) + del add_93 + + # pd_op.divide: (-1x784x192xf32) <- (-1x784x192xf32, xf32) + divide_5 = paddle._C_ops.divide(add_27, full_4) + + # pd_op.multiply: (-1x784x192xf32) <- (-1x784x192xf32, -1x1x1xf32) + multiply_5 = paddle._C_ops.multiply(divide_5, floor_5) + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, -1x784x192xf32) + add_28 = paddle._C_ops.add(add_25, multiply_5) + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_25 = paddle._C_ops.shape64(add_28) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + shape64_25, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_25 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_34 = [slice_45, full_32, full_32, full_33] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_33 = paddle._C_ops.stack(combine_34, 0) + del combine_34 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x784x192xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_28, stack_33) + del stack_33 + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_16, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_5, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_6, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_37, [1, 2], full_int_array_18, full_int_array_29, full_int_array_4 + ) + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_26 = paddle._C_ops.shape64(reshape_37) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + shape64_26, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_26 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_35 = [slice_46, full_32, full_32, full_33] + del full_32, full_33, slice_46 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_34 = paddle._C_ops.stack(combine_35, 0) + del combine_35 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x28x28x192xf32, 4xi64) + reshape_134 = paddle._C_ops.reshape(reshape_37, stack_34) + del stack_34 + + # builtin.combine: ([-1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32]) <- (-1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32) + combine_36 = [ + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + ] + + # pd_op.concat: (-1x14x14x768xf32) <- ([-1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_36, full_2) + del combine_36 + + # pd_op.full: (xi64) <- () + full_42 = paddle._C_ops.full( + [], float("768"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_37 = [slice_45, full_30, full_42] + del slice_45 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_35 = paddle._C_ops.stack(combine_37, 0) + del combine_37 + + # pd_op.reshape: (-1x-1x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_38 = paddle._C_ops.reshape(concat_1, stack_35) + del stack_35 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_38, parameter_105, parameter_104, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_104, parameter_105 + + # pd_op.matmul: (-1x-1x384xf32) <- (-1x-1x768xf32, 768x384xf32) + matmul_21 = paddle._C_ops.matmul(layer_norm_30, parameter_103, False, False) + del parameter_103 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_27 = paddle._C_ops.shape64(matmul_21) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + shape64_27, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_27 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_28 = paddle._C_ops.shape64(matmul_21) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + shape64_28, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + del shape64_28 + + # pd_op.layer_norm: (-1x-1x384xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x384xf32, 384xf32, 384xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_21, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # pd_op.full: (xi64) <- () + full_43 = paddle._C_ops.full( + [], float("14"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_38 = [slice_47, full_43, full_43, full_31] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_36 = paddle._C_ops.stack(combine_38, 0) + del combine_38 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x-1x384xf32, 4xi64) + reshape_39 = paddle._C_ops.reshape(layer_norm_33, stack_36) + del stack_36 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_29 = paddle._C_ops.shape64(reshape_39) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + shape64_29, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_29 + + # pd_op.full: (xi64) <- () + full_44 = paddle._C_ops.full( + [], float("2"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_39 = [slice_49, full_44, full_16, full_44, full_16, full_31] + del slice_49 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_37 = paddle._C_ops.stack(combine_39, 0) + del combine_39 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_135 = paddle._C_ops.reshape(reshape_39, stack_37) + del stack_37 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_25 = paddle._C_ops.transpose(reshape_135, [0, 1, 3, 2, 4, 5]) + del reshape_135 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_38 = [-1, 7, 7, 384] + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(transpose_25, full_int_array_38) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_39 = [-1, 49, 384] + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_41 = paddle._C_ops.reshape(reshape_40, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_30 = paddle._C_ops.shape64(reshape_41) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + shape64_30, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_30 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_22 = paddle._C_ops.matmul(reshape_41, parameter_100, False, False) + del parameter_100 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_29 = paddle._C_ops.add(matmul_22, parameter_99) + del parameter_99 + + # pd_op.full: (xi64) <- () + full_45 = paddle._C_ops.full( + [], float("12"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_40 = [slice_50, full_17, full_18, full_45, full_19] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_38 = paddle._C_ops.stack(combine_40, 0) + del combine_40 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_136 = paddle._C_ops.reshape(add_29, stack_38) + del stack_38 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_26 = paddle._C_ops.transpose(reshape_136, [2, 0, 3, 1, 4]) + del reshape_136 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + transpose_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_51, full_0, float("0"), True) + del slice_51 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_27 = paddle._C_ops.transpose(slice_52, [0, 1, 3, 2]) + del slice_52 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_23 = paddle._C_ops.matmul(scale_4, transpose_27, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_42 = paddle._C_ops.reshape(data_4, full_int_array_12) + del data_4 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_4 = paddle._C_ops.index_select(data_5, reshape_42, 0) + del data_5 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_137 = paddle._C_ops.reshape(index_select_4, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_28 = paddle._C_ops.transpose(reshape_137, [2, 0, 1]) + del reshape_137 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_28, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_94 = paddle._C_ops.add(matmul_23, unsqueeze_6) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_4 = paddle._C_ops.softmax(add_94, -1) + del add_94 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_68 = paddle._C_ops.matmul(softmax_4, slice_4, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_29 = paddle._C_ops.transpose(matmul_68, [0, 2, 1, 3]) + del matmul_68 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_41 = [slice_50, full_17, full_31] + del slice_50 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_39 = paddle._C_ops.stack(combine_41, 0) + del combine_41 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_29, stack_39) + del stack_39 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_24 = paddle._C_ops.matmul(reshape_43, parameter_98, False, False) + del parameter_98 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_97) + del parameter_97 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_30, full_int_array_38) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_40 = [-1, 2, 2, 7, 7, 384] + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_138 = paddle._C_ops.reshape(reshape_44, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_30 = paddle._C_ops.transpose(reshape_138, [0, 1, 3, 2, 4, 5]) + del reshape_138 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_41 = [-1, 14, 14, 384] + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(transpose_30, full_int_array_41) + + # pd_op.full: (xi64) <- () + full_46 = paddle._C_ops.full( + [], float("196"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_42 = [slice_47, full_46, full_31] + del slice_47 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_40 = paddle._C_ops.stack(combine_42, 0) + del combine_42 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_46 = paddle._C_ops.reshape(reshape_45, stack_40) + del stack_40 + + # pd_op.full: (xf32) <- () + full_5 = paddle._C_ops.full( + [], + float("0.927273"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_64 = full_5 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_31 = paddle._C_ops.shape64(reshape_46) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + shape64_31, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_31 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_43 = [slice_53, full_27, full_27] + del slice_53 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_41 = paddle._C_ops.stack(combine_43, 0) + del combine_43 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_6 = paddle._C_ops.uniform( + stack_41, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_41 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_95 = paddle._C_ops.add(full_5, uniform_6) + del uniform_6 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_6 = paddle._C_ops.floor(add_95) + del add_95 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_6 = paddle._C_ops.divide(reshape_46, full_5) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_6 = paddle._C_ops.multiply(divide_6, floor_6) + + # pd_op.add: (-1x196x384xf32) <- (-1x-1x384xf32, -1x196x384xf32) + add_31 = paddle._C_ops.add(matmul_21, multiply_6) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_31, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_36, parameter_94, False, False) + del parameter_94 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_32 = paddle._C_ops.add(matmul_25, parameter_93) + del parameter_93 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_4 = paddle._C_ops.gelu(add_32, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_26 = paddle._C_ops.matmul(gelu_4, parameter_92, False, False) + del parameter_92 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_33 = paddle._C_ops.add(matmul_26, parameter_91) + del parameter_91 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_32 = paddle._C_ops.shape64(add_33) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + shape64_32, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_32 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_44 = [slice_54, full_27, full_27] + del slice_54 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_42 = paddle._C_ops.stack(combine_44, 0) + del combine_44 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_7 = paddle._C_ops.uniform( + stack_42, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_42 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_96 = paddle._C_ops.add(full_5, uniform_7) + del uniform_7 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_7 = paddle._C_ops.floor(add_96) + del add_96 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_7 = paddle._C_ops.divide(add_33, full_5) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_7 = paddle._C_ops.multiply(divide_7, floor_7) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_34 = paddle._C_ops.add(add_31, multiply_7) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_33 = paddle._C_ops.shape64(add_34) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + shape64_33, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_33 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_45 = [slice_55, full_43, full_43, full_31] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_43 = paddle._C_ops.stack(combine_45, 0) + del combine_45 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(layer_norm_39, stack_43) + del stack_43 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_34 = paddle._C_ops.shape64(reshape_47) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + shape64_34, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_34 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_47, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_35 = paddle._C_ops.shape64(roll_4) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + shape64_35, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_35 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_46 = [slice_57, full_44, full_16, full_44, full_16, full_31] + del slice_57 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_44 = paddle._C_ops.stack(combine_46, 0) + del combine_46 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_139 = paddle._C_ops.reshape(roll_4, stack_44) + del stack_44 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_31 = paddle._C_ops.transpose(reshape_139, [0, 1, 3, 2, 4, 5]) + del reshape_139 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(transpose_31, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_49 = paddle._C_ops.reshape(reshape_48, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_47 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + full_47, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_47 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__22 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__23 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__24 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__25 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__26 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__27 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__28 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__29 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_42 = [1, 2, 7, 2, 7, 1] + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_140 = paddle._C_ops.reshape(set_value__2, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_76 = paddle._C_ops.transpose(reshape_140, [0, 1, 3, 2, 4, 5]) + del reshape_140 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_141 = paddle._C_ops.reshape(transpose_76, full_int_array_31) + del transpose_76 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_142 = paddle._C_ops.reshape(reshape_141, full_int_array_32) + del reshape_141 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(reshape_142, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(reshape_142, full_int_array_0) + del reshape_142 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_25, unsqueeze_26) + del unsqueeze_25, unsqueeze_26 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_22) + + # pd_op.full: (4x49x49xf32) <- () + full_48 = paddle._C_ops.full( + [4, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_48, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_22) + + # pd_op.full: (4x49x49xf32) <- () + full_49 = paddle._C_ops.full( + [4, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_5 = paddle._C_ops.where(equal_2, full_49, where_4) + del equal_2, where_4 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_36 = paddle._C_ops.shape64(reshape_49) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + shape64_36, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_36 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_27 = paddle._C_ops.matmul(reshape_49, parameter_88, False, False) + del parameter_88 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_35 = paddle._C_ops.add(matmul_27, parameter_87) + del parameter_87 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_47 = [slice_58, full_17, full_18, full_45, full_19] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_45 = paddle._C_ops.stack(combine_47, 0) + del combine_47 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_143 = paddle._C_ops.reshape(add_35, stack_45) + del stack_45 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_32 = paddle._C_ops.transpose(reshape_143, [2, 0, 3, 1, 4]) + del reshape_143 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_59, full_0, float("0"), True) + del slice_59 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_33 = paddle._C_ops.transpose(slice_60, [0, 1, 3, 2]) + del slice_60 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_28 = paddle._C_ops.matmul(scale_5, transpose_33, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_50 = paddle._C_ops.reshape(data_6, full_int_array_12) + del data_6 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_5 = paddle._C_ops.index_select(data_7, reshape_50, 0) + del data_7 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_144 = paddle._C_ops.reshape(index_select_5, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_34 = paddle._C_ops.transpose(reshape_144, [2, 0, 1]) + del reshape_144 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(transpose_34, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_36 = paddle._C_ops.add(matmul_28, unsqueeze_7) + + # pd_op.full: (xi64) <- () + full_50 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_2 = paddle._C_ops.floor_divide(slice_58, full_50) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_48 = [floor_divide_2, full_34, full_45, full_17, full_17] + del floor_divide_2 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_46 = paddle._C_ops.stack(combine_48, 0) + del combine_48 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_51 = paddle._C_ops.reshape(add_36, stack_46) + del stack_46 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(where_5, full_int_array_8) + del where_5 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(unsqueeze_27, full_int_array_7) + del unsqueeze_27 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_37 = paddle._C_ops.add(reshape_51, unsqueeze_8) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_49 = [slice_58, full_45, full_17, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_47 = paddle._C_ops.stack(combine_49, 0) + del combine_49 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_145 = paddle._C_ops.reshape(add_37, stack_47) + del stack_47 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_5 = paddle._C_ops.softmax(reshape_145, -1) + del reshape_145 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_69 = paddle._C_ops.matmul(softmax_5, slice_5, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_35 = paddle._C_ops.transpose(matmul_69, [0, 2, 1, 3]) + del matmul_69 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_50 = [slice_58, full_17, full_31] + del slice_58 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_48 = paddle._C_ops.stack(combine_50, 0) + del combine_50 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_52 = paddle._C_ops.reshape(transpose_35, stack_48) + del stack_48 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_29 = paddle._C_ops.matmul(reshape_52, parameter_86, False, False) + del parameter_86 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_38 = paddle._C_ops.add(matmul_29, parameter_85) + del parameter_85 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(add_38, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_146 = paddle._C_ops.reshape(reshape_53, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_36 = paddle._C_ops.transpose(reshape_146, [0, 1, 3, 2, 4, 5]) + del reshape_146 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(transpose_36, full_int_array_41) + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_54, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_51 = [slice_55, full_46, full_31] + del slice_55 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_49 = paddle._C_ops.stack(combine_51, 0) + del combine_51 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(roll_5, stack_49) + del stack_49 + + # pd_op.full: (xf32) <- () + full_6 = paddle._C_ops.full( + [], + float("0.909091"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_75 = full_6 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_37 = paddle._C_ops.shape64(reshape_55) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + shape64_37, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_37 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_52 = [slice_61, full_27, full_27] + del slice_61 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_50 = paddle._C_ops.stack(combine_52, 0) + del combine_52 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_8 = paddle._C_ops.uniform( + stack_50, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_50 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_97 = paddle._C_ops.add(full_6, uniform_8) + del uniform_8 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_8 = paddle._C_ops.floor(add_97) + del add_97 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_8 = paddle._C_ops.divide(reshape_55, full_6) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_8 = paddle._C_ops.multiply(divide_8, floor_8) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_39 = paddle._C_ops.add(add_34, multiply_8) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_39, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_42, parameter_82, False, False) + del parameter_82 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_40 = paddle._C_ops.add(matmul_30, parameter_81) + del parameter_81 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_5 = paddle._C_ops.gelu(add_40, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_31 = paddle._C_ops.matmul(gelu_5, parameter_80, False, False) + del parameter_80 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_41 = paddle._C_ops.add(matmul_31, parameter_79) + del parameter_79 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_38 = paddle._C_ops.shape64(add_41) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + shape64_38, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_38 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_53 = [slice_62, full_27, full_27] + del slice_62 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_51 = paddle._C_ops.stack(combine_53, 0) + del combine_53 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_9 = paddle._C_ops.uniform( + stack_51, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_51 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_98 = paddle._C_ops.add(full_6, uniform_9) + del uniform_9 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_9 = paddle._C_ops.floor(add_98) + del add_98 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_9 = paddle._C_ops.divide(add_41, full_6) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_9 = paddle._C_ops.multiply(divide_9, floor_9) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_42 = paddle._C_ops.add(add_39, multiply_9) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_39 = paddle._C_ops.shape64(add_42) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + shape64_39, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_39 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_42, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_54 = [slice_63, full_43, full_43, full_31] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_52 = paddle._C_ops.stack(combine_54, 0) + del combine_54 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(layer_norm_45, stack_52) + del stack_52 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_40 = paddle._C_ops.shape64(reshape_56) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + shape64_40, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_40 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_55 = [slice_64, full_44, full_16, full_44, full_16, full_31] + del slice_64 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_53 = paddle._C_ops.stack(combine_55, 0) + del combine_55 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_147 = paddle._C_ops.reshape(reshape_56, stack_53) + del stack_53 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_37 = paddle._C_ops.transpose(reshape_147, [0, 1, 3, 2, 4, 5]) + del reshape_147 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(transpose_37, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_58 = paddle._C_ops.reshape(reshape_57, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_41 = paddle._C_ops.shape64(reshape_58) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + shape64_41, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_41 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_32 = paddle._C_ops.matmul(reshape_58, parameter_76, False, False) + del parameter_76 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_43 = paddle._C_ops.add(matmul_32, parameter_75) + del parameter_75 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_56 = [slice_65, full_17, full_18, full_45, full_19] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_54 = paddle._C_ops.stack(combine_56, 0) + del combine_56 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_148 = paddle._C_ops.reshape(add_43, stack_54) + del stack_54 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_38 = paddle._C_ops.transpose(reshape_148, [2, 0, 3, 1, 4]) + del reshape_148 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_66, full_0, float("0"), True) + del slice_66 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_39 = paddle._C_ops.transpose(slice_67, [0, 1, 3, 2]) + del slice_67 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_33 = paddle._C_ops.matmul(scale_6, transpose_39, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_59 = paddle._C_ops.reshape(data_8, full_int_array_12) + del data_8 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_6 = paddle._C_ops.index_select(data_9, reshape_59, 0) + del data_9 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_149 = paddle._C_ops.reshape(index_select_6, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_40 = paddle._C_ops.transpose(reshape_149, [2, 0, 1]) + del reshape_149 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_40, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_99 = paddle._C_ops.add(matmul_33, unsqueeze_9) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_6 = paddle._C_ops.softmax(add_99, -1) + del add_99 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_70 = paddle._C_ops.matmul(softmax_6, slice_6, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_41 = paddle._C_ops.transpose(matmul_70, [0, 2, 1, 3]) + del matmul_70 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_57 = [slice_65, full_17, full_31] + del slice_65 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_55 = paddle._C_ops.stack(combine_57, 0) + del combine_57 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_60 = paddle._C_ops.reshape(transpose_41, stack_55) + del stack_55 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_34 = paddle._C_ops.matmul(reshape_60, parameter_74, False, False) + del parameter_74 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_44 = paddle._C_ops.add(matmul_34, parameter_73) + del parameter_73 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(add_44, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_150 = paddle._C_ops.reshape(reshape_61, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_42 = paddle._C_ops.transpose(reshape_150, [0, 1, 3, 2, 4, 5]) + del reshape_150 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(transpose_42, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_58 = [slice_63, full_46, full_31] + del slice_63 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_56 = paddle._C_ops.stack(combine_58, 0) + del combine_58 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, stack_56) + del stack_56 + + # pd_op.full: (xf32) <- () + full_7 = paddle._C_ops.full( + [], + float("0.890909"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_84 = full_7 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_42 = paddle._C_ops.shape64(reshape_63) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + shape64_42, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_42 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_59 = [slice_68, full_27, full_27] + del slice_68 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_57 = paddle._C_ops.stack(combine_59, 0) + del combine_59 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_10 = paddle._C_ops.uniform( + stack_57, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_57 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_100 = paddle._C_ops.add(full_7, uniform_10) + del uniform_10 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_10 = paddle._C_ops.floor(add_100) + del add_100 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_10 = paddle._C_ops.divide(reshape_63, full_7) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_10 = paddle._C_ops.multiply(divide_10, floor_10) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_45 = paddle._C_ops.add(add_42, multiply_10) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_48, parameter_70, False, False) + del parameter_70 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_46 = paddle._C_ops.add(matmul_35, parameter_69) + del parameter_69 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_6 = paddle._C_ops.gelu(add_46, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_36 = paddle._C_ops.matmul(gelu_6, parameter_68, False, False) + del parameter_68 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_47 = paddle._C_ops.add(matmul_36, parameter_67) + del parameter_67 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_43 = paddle._C_ops.shape64(add_47) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + shape64_43, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_43 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_60 = [slice_69, full_27, full_27] + del slice_69 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_58 = paddle._C_ops.stack(combine_60, 0) + del combine_60 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_11 = paddle._C_ops.uniform( + stack_58, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_58 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_101 = paddle._C_ops.add(full_7, uniform_11) + del uniform_11 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_11 = paddle._C_ops.floor(add_101) + del add_101 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_11 = paddle._C_ops.divide(add_47, full_7) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_11 = paddle._C_ops.multiply(divide_11, floor_11) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_48 = paddle._C_ops.add(add_45, multiply_11) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_44 = paddle._C_ops.shape64(add_48) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + shape64_44, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_44 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_61 = [slice_70, full_43, full_43, full_31] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_59 = paddle._C_ops.stack(combine_61, 0) + del combine_61 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(layer_norm_51, stack_59) + del stack_59 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_45 = paddle._C_ops.shape64(reshape_64) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + shape64_45, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_45 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_64, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_46 = paddle._C_ops.shape64(roll_6) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_72 = paddle._C_ops.slice( + shape64_46, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_46 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_62 = [slice_72, full_44, full_16, full_44, full_16, full_31] + del slice_72 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_60 = paddle._C_ops.stack(combine_62, 0) + del combine_62 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_151 = paddle._C_ops.reshape(roll_6, stack_60) + del stack_60 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_43 = paddle._C_ops.transpose(reshape_151, [0, 1, 3, 2, 4, 5]) + del reshape_151 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(transpose_43, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, full_int_array_39) + + # pd_op.full: (1x14x14x1xf32) <- () + full_51 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + full_51, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_51 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__30 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__32 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__35 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__37 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_152 = paddle._C_ops.reshape(set_value__3, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_77 = paddle._C_ops.transpose(reshape_152, [0, 1, 3, 2, 4, 5]) + del reshape_152 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_153 = paddle._C_ops.reshape(transpose_77, full_int_array_31) + del transpose_77 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_154 = paddle._C_ops.reshape(reshape_153, full_int_array_32) + del reshape_153 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(reshape_154, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(reshape_154, full_int_array_0) + del reshape_154 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_28, unsqueeze_29) + del unsqueeze_28, unsqueeze_29 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_22) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_48, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_22) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_7 = paddle._C_ops.where(equal_3, full_49, where_6) + del equal_3, where_6 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_47 = paddle._C_ops.shape64(reshape_66) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_73 = paddle._C_ops.slice( + shape64_47, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_47 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_37 = paddle._C_ops.matmul(reshape_66, parameter_64, False, False) + del parameter_64 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_49 = paddle._C_ops.add(matmul_37, parameter_63) + del parameter_63 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_63 = [slice_73, full_17, full_18, full_45, full_19] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_61 = paddle._C_ops.stack(combine_63, 0) + del combine_63 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_155 = paddle._C_ops.reshape(add_49, stack_61) + del stack_61 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_44 = paddle._C_ops.transpose(reshape_155, [2, 0, 3, 1, 4]) + del reshape_155 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_74 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_75 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_74, full_0, float("0"), True) + del slice_74 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_45 = paddle._C_ops.transpose(slice_75, [0, 1, 3, 2]) + del slice_75 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_38 = paddle._C_ops.matmul(scale_7, transpose_45, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_67 = paddle._C_ops.reshape(data_10, full_int_array_12) + del data_10 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_7 = paddle._C_ops.index_select(data_11, reshape_67, 0) + del data_11 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_156 = paddle._C_ops.reshape(index_select_7, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_46 = paddle._C_ops.transpose(reshape_156, [2, 0, 1]) + del reshape_156 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(transpose_46, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_50 = paddle._C_ops.add(matmul_38, unsqueeze_10) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_3 = paddle._C_ops.floor_divide(slice_73, full_50) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_64 = [floor_divide_3, full_34, full_45, full_17, full_17] + del floor_divide_3 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_62 = paddle._C_ops.stack(combine_64, 0) + del combine_64 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_68 = paddle._C_ops.reshape(add_50, stack_62) + del stack_62 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(where_7, full_int_array_8) + del where_7 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_30, full_int_array_7) + del unsqueeze_30 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_51 = paddle._C_ops.add(reshape_68, unsqueeze_11) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_65 = [slice_73, full_45, full_17, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_63 = paddle._C_ops.stack(combine_65, 0) + del combine_65 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_157 = paddle._C_ops.reshape(add_51, stack_63) + del stack_63 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_7 = paddle._C_ops.softmax(reshape_157, -1) + del reshape_157 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_71 = paddle._C_ops.matmul(softmax_7, slice_7, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_47 = paddle._C_ops.transpose(matmul_71, [0, 2, 1, 3]) + del matmul_71 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_66 = [slice_73, full_17, full_31] + del slice_73 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_64 = paddle._C_ops.stack(combine_66, 0) + del combine_66 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_69 = paddle._C_ops.reshape(transpose_47, stack_64) + del stack_64 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_39 = paddle._C_ops.matmul(reshape_69, parameter_62, False, False) + del parameter_62 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_52 = paddle._C_ops.add(matmul_39, parameter_61) + del parameter_61 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(add_52, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_158 = paddle._C_ops.reshape(reshape_70, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_48 = paddle._C_ops.transpose(reshape_158, [0, 1, 3, 2, 4, 5]) + del reshape_158 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_71 = paddle._C_ops.reshape(transpose_48, full_int_array_41) + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_71, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_67 = [slice_70, full_46, full_31] + del slice_70 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_65 = paddle._C_ops.stack(combine_67, 0) + del combine_67 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(roll_7, stack_65) + del stack_65 + + # pd_op.full: (xf32) <- () + full_8 = paddle._C_ops.full( + [], + float("0.872727"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_95 = full_8 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_48 = paddle._C_ops.shape64(reshape_72) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_76 = paddle._C_ops.slice( + shape64_48, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_48 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_68 = [slice_76, full_27, full_27] + del slice_76 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_66 = paddle._C_ops.stack(combine_68, 0) + del combine_68 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_12 = paddle._C_ops.uniform( + stack_66, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_66 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_102 = paddle._C_ops.add(full_8, uniform_12) + del uniform_12 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_12 = paddle._C_ops.floor(add_102) + del add_102 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_12 = paddle._C_ops.divide(reshape_72, full_8) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_12 = paddle._C_ops.multiply(divide_12, floor_12) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_53 = paddle._C_ops.add(add_48, multiply_12) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_54, parameter_58, False, False) + del parameter_58 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_54 = paddle._C_ops.add(matmul_40, parameter_57) + del parameter_57 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_7 = paddle._C_ops.gelu(add_54, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_41 = paddle._C_ops.matmul(gelu_7, parameter_56, False, False) + del parameter_56 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_55 = paddle._C_ops.add(matmul_41, parameter_55) + del parameter_55 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_49 = paddle._C_ops.shape64(add_55) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_77 = paddle._C_ops.slice( + shape64_49, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_49 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_69 = [slice_77, full_27, full_27] + del slice_77 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_67 = paddle._C_ops.stack(combine_69, 0) + del combine_69 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_13 = paddle._C_ops.uniform( + stack_67, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_67 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_103 = paddle._C_ops.add(full_8, uniform_13) + del uniform_13 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_13 = paddle._C_ops.floor(add_103) + del add_103 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_13 = paddle._C_ops.divide(add_55, full_8) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_13 = paddle._C_ops.multiply(divide_13, floor_13) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_56 = paddle._C_ops.add(add_53, multiply_13) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_50 = paddle._C_ops.shape64(add_56) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_78 = paddle._C_ops.slice( + shape64_50, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_50 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_70 = [slice_78, full_43, full_43, full_31] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_68 = paddle._C_ops.stack(combine_70, 0) + del combine_70 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(layer_norm_57, stack_68) + del stack_68 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_51 = paddle._C_ops.shape64(reshape_73) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_79 = paddle._C_ops.slice( + shape64_51, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_71 = [slice_79, full_44, full_16, full_44, full_16, full_31] + del slice_79 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_69 = paddle._C_ops.stack(combine_71, 0) + del combine_71 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_159 = paddle._C_ops.reshape(reshape_73, stack_69) + del stack_69 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_49 = paddle._C_ops.transpose(reshape_159, [0, 1, 3, 2, 4, 5]) + del reshape_159 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(transpose_49, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_75 = paddle._C_ops.reshape(reshape_74, full_int_array_39) + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_52 = paddle._C_ops.shape64(reshape_75) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_80 = paddle._C_ops.slice( + shape64_52, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_52 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_42 = paddle._C_ops.matmul(reshape_75, parameter_52, False, False) + del parameter_52 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_57 = paddle._C_ops.add(matmul_42, parameter_51) + del parameter_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_72 = [slice_80, full_17, full_18, full_45, full_19] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_70 = paddle._C_ops.stack(combine_72, 0) + del combine_72 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_160 = paddle._C_ops.reshape(add_57, stack_70) + del stack_70 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_50 = paddle._C_ops.transpose(reshape_160, [2, 0, 3, 1, 4]) + del reshape_160 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_81 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_82 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + transpose_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_81, full_0, float("0"), True) + del slice_81 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_51 = paddle._C_ops.transpose(slice_82, [0, 1, 3, 2]) + del slice_82 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_43 = paddle._C_ops.matmul(scale_8, transpose_51, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_76 = paddle._C_ops.reshape(data_12, full_int_array_12) + del data_12 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_8 = paddle._C_ops.index_select(data_13, reshape_76, 0) + del data_13 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_161 = paddle._C_ops.reshape(index_select_8, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_52 = paddle._C_ops.transpose(reshape_161, [2, 0, 1]) + del reshape_161 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_52, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_104 = paddle._C_ops.add(matmul_43, unsqueeze_12) + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_8 = paddle._C_ops.softmax(add_104, -1) + del add_104 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_72 = paddle._C_ops.matmul(softmax_8, slice_8, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_53 = paddle._C_ops.transpose(matmul_72, [0, 2, 1, 3]) + del matmul_72 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_73 = [slice_80, full_17, full_31] + del slice_80 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_71 = paddle._C_ops.stack(combine_73, 0) + del combine_73 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_77 = paddle._C_ops.reshape(transpose_53, stack_71) + del stack_71 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_44 = paddle._C_ops.matmul(reshape_77, parameter_50, False, False) + del parameter_50 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_58 = paddle._C_ops.add(matmul_44, parameter_49) + del parameter_49 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(add_58, full_int_array_38) + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_162 = paddle._C_ops.reshape(reshape_78, full_int_array_40) + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_54 = paddle._C_ops.transpose(reshape_162, [0, 1, 3, 2, 4, 5]) + del reshape_162 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_54, full_int_array_41) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_74 = [slice_78, full_46, full_31] + del slice_78 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_72 = paddle._C_ops.stack(combine_74, 0) + del combine_74 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, stack_72) + del stack_72 + + # pd_op.full: (xf32) <- () + full_9 = paddle._C_ops.full( + [], + float("0.854545"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_104 = full_9 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_53 = paddle._C_ops.shape64(reshape_80) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_83 = paddle._C_ops.slice( + shape64_53, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_53 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_75 = [slice_83, full_27, full_27] + del slice_83 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_73 = paddle._C_ops.stack(combine_75, 0) + del combine_75 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_14 = paddle._C_ops.uniform( + stack_73, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_73 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_105 = paddle._C_ops.add(full_9, uniform_14) + del uniform_14 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_14 = paddle._C_ops.floor(add_105) + del add_105 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_14 = paddle._C_ops.divide(reshape_80, full_9) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_14 = paddle._C_ops.multiply(divide_14, floor_14) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_59 = paddle._C_ops.add(add_56, multiply_14) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_59, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_45 = paddle._C_ops.matmul(layer_norm_60, parameter_46, False, False) + del parameter_46 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_60 = paddle._C_ops.add(matmul_45, parameter_45) + del parameter_45 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_8 = paddle._C_ops.gelu(add_60, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_46 = paddle._C_ops.matmul(gelu_8, parameter_44, False, False) + del parameter_44 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_61 = paddle._C_ops.add(matmul_46, parameter_43) + del parameter_43 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_54 = paddle._C_ops.shape64(add_61) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_84 = paddle._C_ops.slice( + shape64_54, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_54 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_76 = [slice_84, full_27, full_27] + del slice_84 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_74 = paddle._C_ops.stack(combine_76, 0) + del combine_76 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_15 = paddle._C_ops.uniform( + stack_74, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_74 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_106 = paddle._C_ops.add(full_9, uniform_15) + del uniform_15 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_15 = paddle._C_ops.floor(add_106) + del add_106 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_15 = paddle._C_ops.divide(add_61, full_9) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_15 = paddle._C_ops.multiply(divide_15, floor_15) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_62 = paddle._C_ops.add(add_59, multiply_15) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_55 = paddle._C_ops.shape64(add_62) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_85 = paddle._C_ops.slice( + shape64_55, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_55 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_77 = [slice_85, full_43, full_43, full_31] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_75 = paddle._C_ops.stack(combine_77, 0) + del combine_77 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(layer_norm_63, stack_75) + del stack_75 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_56 = paddle._C_ops.shape64(reshape_81) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_86 = paddle._C_ops.slice( + shape64_56, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_56 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_81, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_57 = paddle._C_ops.shape64(roll_8) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_87 = paddle._C_ops.slice( + shape64_57, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_57 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_78 = [slice_87, full_44, full_16, full_44, full_16, full_31] + del full_44, slice_87 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_76 = paddle._C_ops.stack(combine_78, 0) + del combine_78 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_163 = paddle._C_ops.reshape(roll_8, stack_76) + del stack_76 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_55 = paddle._C_ops.transpose(reshape_163, [0, 1, 3, 2, 4, 5]) + del reshape_163 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_55, full_int_array_38) + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_39) + del full_int_array_39 + + # pd_op.full: (1x14x14x1xf32) <- () + full_52 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + full_52, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_52 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__43 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__45 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_164 = paddle._C_ops.reshape(set_value__4, full_int_array_42) + del full_int_array_42 + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_78 = paddle._C_ops.transpose(reshape_164, [0, 1, 3, 2, 4, 5]) + del reshape_164 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_165 = paddle._C_ops.reshape(transpose_78, full_int_array_31) + del transpose_78 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_166 = paddle._C_ops.reshape(reshape_165, full_int_array_32) + del reshape_165 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(reshape_166, full_int_array_8) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(reshape_166, full_int_array_0) + del reshape_166 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_31, unsqueeze_32) + del unsqueeze_31, unsqueeze_32 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_22) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_48, subtract_4) + del full_48, not_equal_4, subtract_4 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_22) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_9 = paddle._C_ops.where(equal_4, full_49, where_8) + del equal_4, full_49, where_8 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_58 = paddle._C_ops.shape64(reshape_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_88 = paddle._C_ops.slice( + shape64_58, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_58 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_47 = paddle._C_ops.matmul(reshape_83, parameter_40, False, False) + del parameter_40 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_63 = paddle._C_ops.add(matmul_47, parameter_39) + del parameter_39 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_79 = [slice_88, full_17, full_18, full_45, full_19] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_77 = paddle._C_ops.stack(combine_79, 0) + del combine_79 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_167 = paddle._C_ops.reshape(add_63, stack_77) + del stack_77 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_56 = paddle._C_ops.transpose(reshape_167, [2, 0, 3, 1, 4]) + del reshape_167 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_89 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_90 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + transpose_56, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_89, full_0, float("0"), True) + del slice_89 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_57 = paddle._C_ops.transpose(slice_90, [0, 1, 3, 2]) + del slice_90 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_48 = paddle._C_ops.matmul(scale_9, transpose_57, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_84 = paddle._C_ops.reshape(data_14, full_int_array_12) + del data_14 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_9 = paddle._C_ops.index_select(data_15, reshape_84, 0) + del data_15 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_168 = paddle._C_ops.reshape(index_select_9, full_int_array_13) + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_58 = paddle._C_ops.transpose(reshape_168, [2, 0, 1]) + del reshape_168 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(transpose_58, full_int_array_7) + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_64 = paddle._C_ops.add(matmul_48, unsqueeze_13) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_4 = paddle._C_ops.floor_divide(slice_88, full_50) + del full_50 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_80 = [floor_divide_4, full_34, full_45, full_17, full_17] + del floor_divide_4, full_34 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_78 = paddle._C_ops.stack(combine_80, 0) + del combine_80 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_85 = paddle._C_ops.reshape(add_64, stack_78) + del stack_78 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(where_9, full_int_array_8) + del where_9 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(unsqueeze_33, full_int_array_7) + del unsqueeze_33 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_65 = paddle._C_ops.add(reshape_85, unsqueeze_14) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_81 = [slice_88, full_45, full_17, full_17] + del full_45 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_79 = paddle._C_ops.stack(combine_81, 0) + del combine_81 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_169 = paddle._C_ops.reshape(add_65, stack_79) + del stack_79 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_9 = paddle._C_ops.softmax(reshape_169, -1) + del reshape_169 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_73 = paddle._C_ops.matmul(softmax_9, slice_9, False, False) + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_59 = paddle._C_ops.transpose(matmul_73, [0, 2, 1, 3]) + del matmul_73 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_82 = [slice_88, full_17, full_31] + del slice_88 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_80 = paddle._C_ops.stack(combine_82, 0) + del combine_82 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(transpose_59, stack_80) + del stack_80 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_49 = paddle._C_ops.matmul(reshape_86, parameter_38, False, False) + del parameter_38 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_66 = paddle._C_ops.add(matmul_49, parameter_37) + del parameter_37 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_87 = paddle._C_ops.reshape(add_66, full_int_array_38) + del full_int_array_38 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_170 = paddle._C_ops.reshape(reshape_87, full_int_array_40) + del full_int_array_40 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_60 = paddle._C_ops.transpose(reshape_170, [0, 1, 3, 2, 4, 5]) + del reshape_170 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(transpose_60, full_int_array_41) + del full_int_array_41 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_88, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_83 = [slice_85, full_46, full_31] + del full_46, slice_85 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_81 = paddle._C_ops.stack(combine_83, 0) + del combine_83 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(roll_9, stack_81) + del stack_81 + + # pd_op.full: (xf32) <- () + full_10 = paddle._C_ops.full( + [], + float("0.836364"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_115 = full_10 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_59 = paddle._C_ops.shape64(reshape_89) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_91 = paddle._C_ops.slice( + shape64_59, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_59 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_84 = [slice_91, full_27, full_27] + del slice_91 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_82 = paddle._C_ops.stack(combine_84, 0) + del combine_84 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_16 = paddle._C_ops.uniform( + stack_82, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_82 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_107 = paddle._C_ops.add(full_10, uniform_16) + del uniform_16 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_16 = paddle._C_ops.floor(add_107) + del add_107 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_16 = paddle._C_ops.divide(reshape_89, full_10) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_16 = paddle._C_ops.multiply(divide_16, floor_16) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_67 = paddle._C_ops.add(add_62, multiply_16) + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_67, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_66, parameter_34, False, False) + del parameter_34 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_68 = paddle._C_ops.add(matmul_50, parameter_33) + del parameter_33 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_9 = paddle._C_ops.gelu(add_68, False) + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_51 = paddle._C_ops.matmul(gelu_9, parameter_32, False, False) + del parameter_32 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_69 = paddle._C_ops.add(matmul_51, parameter_31) + del parameter_31 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_60 = paddle._C_ops.shape64(add_69) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_92 = paddle._C_ops.slice( + shape64_60, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_60 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_85 = [slice_92, full_27, full_27] + del slice_92 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_83 = paddle._C_ops.stack(combine_85, 0) + del combine_85 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_17 = paddle._C_ops.uniform( + stack_83, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_83 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_108 = paddle._C_ops.add(full_10, uniform_17) + del uniform_17 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_17 = paddle._C_ops.floor(add_108) + del add_108 + + # pd_op.divide: (-1x196x384xf32) <- (-1x196x384xf32, xf32) + divide_17 = paddle._C_ops.divide(add_69, full_10) + + # pd_op.multiply: (-1x196x384xf32) <- (-1x196x384xf32, -1x1x1xf32) + multiply_17 = paddle._C_ops.multiply(divide_17, floor_17) + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_70 = paddle._C_ops.add(add_67, multiply_17) + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_61 = paddle._C_ops.shape64(add_70) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_93 = paddle._C_ops.slice( + shape64_61, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_61 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_86 = [slice_93, full_43, full_43, full_31] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_84 = paddle._C_ops.stack(combine_86, 0) + del combine_86 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(add_70, stack_84) + del stack_84 + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_90, [1, 2], full_int_array_16, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_90, [1, 2], full_int_array_5, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_90, [1, 2], full_int_array_6, full_int_array_29, full_int_array_4 + ) + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_90, [1, 2], full_int_array_18, full_int_array_29, full_int_array_4 + ) + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_62 = paddle._C_ops.shape64(reshape_90) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_94 = paddle._C_ops.slice( + shape64_62, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_62 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_87 = [slice_94, full_43, full_43, full_31] + del full_31, full_43, slice_94 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_85 = paddle._C_ops.stack(combine_87, 0) + del combine_87 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 4xi64) + reshape_171 = paddle._C_ops.reshape(reshape_90, stack_85) + del stack_85 + + # builtin.combine: ([-1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32]) <- (-1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32) + combine_88 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + + # pd_op.concat: (-1x7x7x1536xf32) <- ([-1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_88, full_2) + del combine_88 + + # pd_op.full: (xi64) <- () + full_53 = paddle._C_ops.full( + [], float("1536"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_89 = [slice_93, full_30, full_53] + del full_30, full_53, slice_93 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_86 = paddle._C_ops.stack(combine_89, 0) + del combine_89 + + # pd_op.reshape: (-1x-1x1536xf32) <- (-1x7x7x1536xf32, 3xi64) + reshape_91 = paddle._C_ops.reshape(concat_2, stack_86) + del stack_86 + + # pd_op.layer_norm: (-1x-1x1536xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1536xf32, 1536xf32, 1536xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_91, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30 + + # pd_op.matmul: (-1x-1x768xf32) <- (-1x-1x1536xf32, 1536x768xf32) + matmul_52 = paddle._C_ops.matmul(layer_norm_69, parameter_28, False, False) + del parameter_28 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_63 = paddle._C_ops.shape64(matmul_52) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_95 = paddle._C_ops.slice( + shape64_63, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_63 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_64 = paddle._C_ops.shape64(matmul_52) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_96 = paddle._C_ops.slice( + shape64_64, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + del shape64_64 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_52, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_90 = [slice_95, full_16, full_16, full_42] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_87 = paddle._C_ops.stack(combine_90, 0) + del combine_90 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x-1x768xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(layer_norm_72, stack_87) + del stack_87 + + # pd_op.shape64: (4xi64) <- (-1x7x7x768xf32) + shape64_65 = paddle._C_ops.shape64(reshape_92) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_97 = paddle._C_ops.slice( + shape64_65, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_65 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_91 = [slice_97, full_27, full_16, full_27, full_16, full_42] + del slice_97 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_88 = paddle._C_ops.stack(combine_91, 0) + del combine_91 + + # pd_op.reshape: (-1x1x7x1x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_172 = paddle._C_ops.reshape(reshape_92, stack_88) + del stack_88 + + # pd_op.transpose: (-1x1x1x7x7x768xf32) <- (-1x1x7x1x7x768xf32) + transpose_61 = paddle._C_ops.transpose(reshape_172, [0, 1, 3, 2, 4, 5]) + del reshape_172 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_43 = [-1, 7, 7, 768] + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x1x7x7x768xf32, 4xi64) + reshape_93 = paddle._C_ops.reshape(transpose_61, full_int_array_43) + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_44 = [-1, 49, 768] + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_94 = paddle._C_ops.reshape(reshape_93, full_int_array_44) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_66 = paddle._C_ops.shape64(reshape_94) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_98 = paddle._C_ops.slice( + shape64_66, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_66 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_53 = paddle._C_ops.matmul(reshape_94, parameter_25, False, False) + del parameter_25 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_71 = paddle._C_ops.add(matmul_53, parameter_24) + del parameter_24 + + # pd_op.full: (xi64) <- () + full_54 = paddle._C_ops.full( + [], float("24"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_92 = [slice_98, full_17, full_18, full_54, full_19] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_89 = paddle._C_ops.stack(combine_92, 0) + del combine_92 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_173 = paddle._C_ops.reshape(add_71, stack_89) + del stack_89 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_62 = paddle._C_ops.transpose(reshape_173, [2, 0, 3, 1, 4]) + del reshape_173 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_99 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_100 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + transpose_62, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_99, full_0, float("0"), True) + del slice_99 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_63 = paddle._C_ops.transpose(slice_100, [0, 1, 3, 2]) + del slice_100 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_54 = paddle._C_ops.matmul(scale_10, transpose_63, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_95 = paddle._C_ops.reshape(data_16, full_int_array_12) + del data_16 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_10 = paddle._C_ops.index_select(data_17, reshape_95, 0) + del data_17 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_174 = paddle._C_ops.reshape(index_select_10, full_int_array_13) + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_64 = paddle._C_ops.transpose(reshape_174, [2, 0, 1]) + del reshape_174 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_64, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_109 = paddle._C_ops.add(matmul_54, unsqueeze_15) + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_10 = paddle._C_ops.softmax(add_109, -1) + del add_109 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_74 = paddle._C_ops.matmul(softmax_10, slice_10, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_65 = paddle._C_ops.transpose(matmul_74, [0, 2, 1, 3]) + del matmul_74 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_93 = [slice_98, full_17, full_42] + del slice_98 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_90 = paddle._C_ops.stack(combine_93, 0) + del combine_93 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_96 = paddle._C_ops.reshape(transpose_65, stack_90) + del stack_90 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_55 = paddle._C_ops.matmul(reshape_96, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_72 = paddle._C_ops.add(matmul_55, parameter_22) + del parameter_22 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_97 = paddle._C_ops.reshape(add_72, full_int_array_43) + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_45 = [-1, 1, 1, 7, 7, 768] + + # pd_op.reshape: (-1x1x1x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_175 = paddle._C_ops.reshape(reshape_97, full_int_array_45) + + # pd_op.transpose: (-1x1x7x1x7x768xf32) <- (-1x1x1x7x7x768xf32) + transpose_66 = paddle._C_ops.transpose(reshape_175, [0, 1, 3, 2, 4, 5]) + del reshape_175 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x7x1x7x768xf32, 4xi64) + reshape_98 = paddle._C_ops.reshape(transpose_66, full_int_array_43) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_94 = [slice_95, full_17, full_42] + del slice_95 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_91 = paddle._C_ops.stack(combine_94, 0) + del combine_94 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_99 = paddle._C_ops.reshape(reshape_98, stack_91) + del stack_91 + + # pd_op.full: (xf32) <- () + full_11 = paddle._C_ops.full( + [], + float("0.818182"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_137 = full_11 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_67 = paddle._C_ops.shape64(reshape_99) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_101 = paddle._C_ops.slice( + shape64_67, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_67 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_95 = [slice_101, full_27, full_27] + del slice_101 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_92 = paddle._C_ops.stack(combine_95, 0) + del combine_95 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_18 = paddle._C_ops.uniform( + stack_92, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_92 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_110 = paddle._C_ops.add(full_11, uniform_18) + del uniform_18 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_18 = paddle._C_ops.floor(add_110) + del add_110 + + # pd_op.divide: (-1x49x768xf32) <- (-1x49x768xf32, xf32) + divide_18 = paddle._C_ops.divide(reshape_99, full_11) + + # pd_op.multiply: (-1x49x768xf32) <- (-1x49x768xf32, -1x1x1xf32) + multiply_18 = paddle._C_ops.multiply(divide_18, floor_18) + + # pd_op.add: (-1x49x768xf32) <- (-1x-1x768xf32, -1x49x768xf32) + add_73 = paddle._C_ops.add(matmul_52, multiply_18) + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (-1x49x3072xf32) <- (-1x49x768xf32, 768x3072xf32) + matmul_56 = paddle._C_ops.matmul(layer_norm_75, parameter_19, False, False) + del parameter_19 + + # pd_op.add: (-1x49x3072xf32) <- (-1x49x3072xf32, 3072xf32) + add_74 = paddle._C_ops.add(matmul_56, parameter_18) + del parameter_18 + + # pd_op.gelu: (-1x49x3072xf32) <- (-1x49x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_74, False) + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x3072xf32, 3072x768xf32) + matmul_57 = paddle._C_ops.matmul(gelu_10, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_57, parameter_16) + del parameter_16 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_68 = paddle._C_ops.shape64(add_75) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_102 = paddle._C_ops.slice( + shape64_68, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_68 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_96 = [slice_102, full_27, full_27] + del slice_102 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_93 = paddle._C_ops.stack(combine_96, 0) + del combine_96 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_19 = paddle._C_ops.uniform( + stack_93, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_93 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_111 = paddle._C_ops.add(full_11, uniform_19) + del uniform_19 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_19 = paddle._C_ops.floor(add_111) + del add_111 + + # pd_op.divide: (-1x49x768xf32) <- (-1x49x768xf32, xf32) + divide_19 = paddle._C_ops.divide(add_75, full_11) + + # pd_op.multiply: (-1x49x768xf32) <- (-1x49x768xf32, -1x1x1xf32) + multiply_19 = paddle._C_ops.multiply(divide_19, floor_19) + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, -1x49x768xf32) + add_76 = paddle._C_ops.add(add_73, multiply_19) + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_69 = paddle._C_ops.shape64(add_76) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_103 = paddle._C_ops.slice( + shape64_69, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_69 + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_76, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_97 = [slice_103, full_16, full_16, full_42] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_94 = paddle._C_ops.stack(combine_97, 0) + del combine_97 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_100 = paddle._C_ops.reshape(layer_norm_78, stack_94) + del stack_94 + + # pd_op.shape64: (4xi64) <- (-1x7x7x768xf32) + shape64_70 = paddle._C_ops.shape64(reshape_100) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_104 = paddle._C_ops.slice( + shape64_70, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_70 + + # pd_op.roll: (-1x7x7x768xf32) <- (-1x7x7x768xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_100, full_int_array_2, [1, 2]) + + # pd_op.shape64: (4xi64) <- (-1x7x7x768xf32) + shape64_71 = paddle._C_ops.shape64(roll_10) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_105 = paddle._C_ops.slice( + shape64_71, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_71 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_98 = [slice_105, full_27, full_16, full_27, full_16, full_42] + del full_16, slice_105 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_95 = paddle._C_ops.stack(combine_98, 0) + del combine_98 + + # pd_op.reshape: (-1x1x7x1x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_176 = paddle._C_ops.reshape(roll_10, stack_95) + del stack_95 + + # pd_op.transpose: (-1x1x1x7x7x768xf32) <- (-1x1x7x1x7x768xf32) + transpose_67 = paddle._C_ops.transpose(reshape_176, [0, 1, 3, 2, 4, 5]) + del reshape_176 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x1x7x7x768xf32, 4xi64) + reshape_101 = paddle._C_ops.reshape(transpose_67, full_int_array_43) + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_102 = paddle._C_ops.reshape(reshape_101, full_int_array_44) + del full_int_array_44 + + # pd_op.full: (1x7x7x1xf32) <- () + full_55 = paddle._C_ops.full( + [1, 7, 7, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + full_55, + full_int_array_16, + full_int_array_17, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_55, full_int_array_16 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_19, + full_int_array_20, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_19, set_value__46 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_21, + full_int_array_22, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_21, full_int_array_22, set_value__47 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_23, + full_int_array_24, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_23, set_value__48 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_17, + full_int_array_2, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_17, set_value__49 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_20, + full_int_array_25, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_20, full_int_array_25, set_value__50 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_26, + full_int_array_27, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_26, full_int_array_27, set_value__51 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_24, + full_int_array_28, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_24, full_int_array_28, set_value__52 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_2, + full_int_array_29, + full_int_array_18, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_29, set_value__53 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_46 = [1, 1, 7, 1, 7, 1] + + # pd_op.reshape: (1x1x7x1x7x1xf32) <- (1x7x7x1xf32, 6xi64) + reshape_177 = paddle._C_ops.reshape(set_value__5, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (1x1x1x7x7x1xf32) <- (1x1x7x1x7x1xf32) + transpose_79 = paddle._C_ops.transpose(reshape_177, [0, 1, 3, 2, 4, 5]) + del reshape_177 + + # pd_op.reshape: (1x7x7x1xf32) <- (1x1x1x7x7x1xf32, 4xi64) + reshape_178 = paddle._C_ops.reshape(transpose_79, full_int_array_31) + del full_int_array_31, transpose_79 + + # pd_op.reshape: (1x49xf32) <- (1x7x7x1xf32, 2xi64) + reshape_179 = paddle._C_ops.reshape(reshape_178, full_int_array_32) + del full_int_array_32, reshape_178 + + # pd_op.unsqueeze: (1x1x49xf32) <- (1x49xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(reshape_179, full_int_array_8) + + # pd_op.unsqueeze: (1x49x1xf32) <- (1x49xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(reshape_179, full_int_array_0) + del reshape_179 + + # pd_op.subtract: (1x49x49xf32) <- (1x1x49xf32, 1x49x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_34, unsqueeze_35) + del unsqueeze_34, unsqueeze_35 + + # pd_op.not_equal: (1x49x49xb) <- (1x49x49xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_22) + + # pd_op.full: (1x49x49xf32) <- () + full_56 = paddle._C_ops.full( + [1, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_56, subtract_5) + del full_56, not_equal_5, subtract_5 + + # pd_op.equal: (1x49x49xb) <- (1x49x49xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_22) + del full_22 + + # pd_op.full: (1x49x49xf32) <- () + full_57 = paddle._C_ops.full( + [1, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_11 = paddle._C_ops.where(equal_5, full_57, where_10) + del equal_5, full_57, where_10 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_72 = paddle._C_ops.shape64(reshape_102) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_106 = paddle._C_ops.slice( + shape64_72, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_72 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_58 = paddle._C_ops.matmul(reshape_102, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_77 = paddle._C_ops.add(matmul_58, parameter_12) + del parameter_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_99 = [slice_106, full_17, full_18, full_54, full_19] + del full_18, full_19 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_96 = paddle._C_ops.stack(combine_99, 0) + del combine_99 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_180 = paddle._C_ops.reshape(add_77, stack_96) + del stack_96 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_68 = paddle._C_ops.transpose(reshape_180, [2, 0, 3, 1, 4]) + del reshape_180 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_107 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_108 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_8, full_int_array_0, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + transpose_68, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_107, full_0, float("0"), True) + del slice_107 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_69 = paddle._C_ops.transpose(slice_108, [0, 1, 3, 2]) + del slice_108 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_59 = paddle._C_ops.matmul(scale_11, transpose_69, False, False) + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_103 = paddle._C_ops.reshape(data_18, full_int_array_12) + del data_18, full_int_array_12 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_11 = paddle._C_ops.index_select(data_19, reshape_103, 0) + del data_19 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_181 = paddle._C_ops.reshape(index_select_11, full_int_array_13) + del full_int_array_13 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_70 = paddle._C_ops.transpose(reshape_181, [2, 0, 1]) + del reshape_181 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(transpose_70, full_int_array_7) + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_78 = paddle._C_ops.add(matmul_59, unsqueeze_16) + + # pd_op.full: (xi64) <- () + full_58 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_5 = paddle._C_ops.floor_divide(slice_106, full_58) + del full_58 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_100 = [floor_divide_5, full_27, full_54, full_17, full_17] + del floor_divide_5 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_97 = paddle._C_ops.stack(combine_100, 0) + del combine_100 + + # pd_op.reshape: (-1x1x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_104 = paddle._C_ops.reshape(add_78, stack_97) + del stack_97 + + # pd_op.unsqueeze: (1x1x49x49xf32) <- (1x49x49xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(where_11, full_int_array_8) + del where_11 + + # pd_op.unsqueeze: (1x1x1x49x49xf32) <- (1x1x49x49xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_36, full_int_array_7) + del unsqueeze_36 + + # pd_op.add: (-1x1x24x49x49xf32) <- (-1x1x24x49x49xf32, 1x1x1x49x49xf32) + add_79 = paddle._C_ops.add(reshape_104, unsqueeze_17) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_101 = [slice_106, full_54, full_17, full_17] + del full_54 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_98 = paddle._C_ops.stack(combine_101, 0) + del combine_101 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x1x24x49x49xf32, 4xi64) + reshape_182 = paddle._C_ops.reshape(add_79, stack_98) + del stack_98 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_11 = paddle._C_ops.softmax(reshape_182, -1) + del reshape_182 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_75 = paddle._C_ops.matmul(softmax_11, slice_11, False, False) + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_71 = paddle._C_ops.transpose(matmul_75, [0, 2, 1, 3]) + del matmul_75 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_102 = [slice_106, full_17, full_42] + del slice_106 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_99 = paddle._C_ops.stack(combine_102, 0) + del combine_102 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_105 = paddle._C_ops.reshape(transpose_71, stack_99) + del stack_99 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_60 = paddle._C_ops.matmul(reshape_105, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_80 = paddle._C_ops.add(matmul_60, parameter_10) + del parameter_10 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_106 = paddle._C_ops.reshape(add_80, full_int_array_43) + + # pd_op.reshape: (-1x1x1x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_183 = paddle._C_ops.reshape(reshape_106, full_int_array_45) + del full_int_array_45 + + # pd_op.transpose: (-1x1x7x1x7x768xf32) <- (-1x1x1x7x7x768xf32) + transpose_72 = paddle._C_ops.transpose(reshape_183, [0, 1, 3, 2, 4, 5]) + del reshape_183 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x7x1x7x768xf32, 4xi64) + reshape_107 = paddle._C_ops.reshape(transpose_72, full_int_array_43) + del full_int_array_43 + + # pd_op.roll: (-1x7x7x768xf32) <- (-1x7x7x768xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_107, full_int_array_3, [1, 2]) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_103 = [slice_103, full_17, full_42] + del full_17, full_42, slice_103 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_100 = paddle._C_ops.stack(combine_103, 0) + del combine_103 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_108 = paddle._C_ops.reshape(roll_11, stack_100) + del stack_100 + + # pd_op.full: (xf32) <- () + full_12 = paddle._C_ops.full( + [], float("0.8"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.assign: (xf32) <- (xf32) + assign_148 = full_12 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_73 = paddle._C_ops.shape64(reshape_108) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_109 = paddle._C_ops.slice( + shape64_73, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del shape64_73 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_104 = [slice_109, full_27, full_27] + del slice_109 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_101 = paddle._C_ops.stack(combine_104, 0) + del combine_104 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_20 = paddle._C_ops.uniform( + stack_101, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del stack_101 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_112 = paddle._C_ops.add(full_12, uniform_20) + del uniform_20 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_20 = paddle._C_ops.floor(add_112) + del add_112 + + # pd_op.divide: (-1x49x768xf32) <- (-1x49x768xf32, xf32) + divide_20 = paddle._C_ops.divide(reshape_108, full_12) + + # pd_op.multiply: (-1x49x768xf32) <- (-1x49x768xf32, -1x1x1xf32) + multiply_20 = paddle._C_ops.multiply(divide_20, floor_20) + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, -1x49x768xf32) + add_81 = paddle._C_ops.add(add_76, multiply_20) + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (-1x49x3072xf32) <- (-1x49x768xf32, 768x3072xf32) + matmul_61 = paddle._C_ops.matmul(layer_norm_81, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (-1x49x3072xf32) <- (-1x49x3072xf32, 3072xf32) + add_82 = paddle._C_ops.add(matmul_61, parameter_6) + del parameter_6 + + # pd_op.gelu: (-1x49x3072xf32) <- (-1x49x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_82, False) + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x3072xf32, 3072x768xf32) + matmul_62 = paddle._C_ops.matmul(gelu_11, parameter_5, False, False) + del parameter_5 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_83 = paddle._C_ops.add(matmul_62, parameter_4) + del parameter_4 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_74 = paddle._C_ops.shape64(add_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_110 = paddle._C_ops.slice( + shape64_74, [0], full_int_array_7, full_int_array_8, [1], [0] + ) + del full_int_array_7, full_int_array_8, shape64_74 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_105 = [slice_110, full_27, full_27] + del full_27, slice_110 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_102 = paddle._C_ops.stack(combine_105, 0) + del combine_105 + + # pd_op.uniform: (-1x1x1xf32) <- (3xi64, 1xf32, 1xf32) + uniform_21 = paddle._C_ops.uniform( + stack_102, + paddle.float32, + full_28, + full_29, + 0, + paddle.framework._current_expected_place(), + ) + del full_28, full_29, stack_102 + + # pd_op.add: (-1x1x1xf32) <- (xf32, -1x1x1xf32) + add_113 = paddle._C_ops.add(full_12, uniform_21) + del uniform_21 + + # pd_op.floor: (-1x1x1xf32) <- (-1x1x1xf32) + floor_21 = paddle._C_ops.floor(add_113) + del add_113 + + # pd_op.divide: (-1x49x768xf32) <- (-1x49x768xf32, xf32) + divide_21 = paddle._C_ops.divide(add_83, full_12) + + # pd_op.multiply: (-1x49x768xf32) <- (-1x49x768xf32, -1x1x1xf32) + multiply_21 = paddle._C_ops.multiply(divide_21, floor_21) + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, -1x49x768xf32) + add_84 = paddle._C_ops.add(add_81, multiply_21) + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_86, layer_norm_84, layer_norm_85 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_84, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_2, parameter_3 + + # pd_op.transpose: (-1x768x49xf32) <- (-1x49x768xf32) + transpose_73 = paddle._C_ops.transpose(layer_norm_86, [0, 2, 1]) + del layer_norm_86 + + # pd_op.unsqueeze: (-1x768x1x49xf32) <- (-1x768x49xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_73, full_int_array_0) + + # pd_op.pool2d: (-1x768x1x1xf32) <- (-1x768x1x49xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_18, + full_int_array_18, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_18 + + # pd_op.squeeze: (-1x768x1xf32) <- (-1x768x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_0) + + # pd_op.flatten: (-1x768xf32) <- (-1x768x1xf32) + flatten_0 = paddle._C_ops.flatten(squeeze_0, 1, 2) + + # pd_op.matmul: (-1x102xf32) <- (-1x768xf32, 768x102xf32) + matmul_63 = paddle._C_ops.matmul(flatten_0, parameter_1, False, False) + del parameter_1 + + # pd_op.add: (-1x102xf32) <- (-1x102xf32, 102xf32) + add_85 = paddle._C_ops.add(matmul_63, parameter_0) + del ( + assign_0, + assign_1, + assign_10, + assign_100, + assign_101, + assign_103, + assign_105, + assign_106, + assign_107, + assign_108, + assign_109, + assign_110, + assign_111, + assign_113, + assign_114, + assign_116, + assign_117, + assign_118, + assign_119, + assign_12, + assign_120, + assign_121, + assign_122, + assign_123, + assign_124, + assign_125, + assign_126, + assign_127, + assign_129, + assign_130, + assign_131, + assign_132, + assign_133, + assign_134, + assign_136, + assign_138, + assign_139, + assign_14, + assign_140, + assign_141, + assign_142, + assign_143, + assign_144, + assign_146, + assign_147, + assign_149, + assign_15, + assign_150, + assign_151, + assign_16, + assign_17, + assign_18, + assign_19, + assign_2, + assign_20, + assign_21, + assign_22, + assign_23, + assign_24, + assign_25, + assign_26, + assign_27, + assign_28, + assign_3, + assign_30, + assign_32, + assign_33, + assign_34, + assign_35, + assign_36, + assign_37, + assign_38, + assign_4, + assign_40, + assign_41, + assign_43, + assign_44, + assign_45, + assign_46, + assign_47, + assign_48, + assign_49, + assign_5, + assign_50, + assign_51, + assign_52, + assign_53, + assign_54, + assign_56, + assign_57, + assign_58, + assign_59, + assign_6, + assign_60, + assign_61, + assign_63, + assign_65, + assign_66, + assign_67, + assign_68, + assign_69, + assign_7, + assign_70, + assign_71, + assign_73, + assign_74, + assign_76, + assign_77, + assign_78, + assign_79, + assign_8, + assign_80, + assign_81, + assign_83, + assign_85, + assign_86, + assign_87, + assign_88, + assign_89, + assign_9, + assign_90, + assign_91, + assign_93, + assign_94, + assign_96, + assign_97, + assign_98, + assign_99, + full_int_array_0, + full_int_array_1, + full_int_array_2, + full_int_array_3, + full_int_array_4, + full_int_array_5, + full_int_array_6, + parameter_0, + ) + + return ( + conv2d_0, + reshape_0, + add_0, + transpose_0, + layer_norm_0, + layer_norm_1, + layer_norm_2, + layer_norm_3, + layer_norm_4, + layer_norm_5, + reshape_1, + transpose_1, + reshape_2, + reshape_3, + matmul_0, + add_1, + transpose_2, + slice_0, + full_0, + scale_0, + transpose_3, + matmul_1, + reshape_4, + index_select_0, + transpose_4, + unsqueeze_0, + softmax_0, + transpose_5, + reshape_5, + matmul_2, + add_2, + reshape_6, + transpose_6, + reshape_7, + reshape_8, + add_3, + layer_norm_6, + layer_norm_7, + layer_norm_8, + matmul_3, + add_4, + gelu_0, + matmul_4, + add_5, + add_6, + layer_norm_9, + layer_norm_10, + layer_norm_11, + reshape_9, + roll_0, + transpose_7, + reshape_10, + reshape_11, + matmul_5, + add_7, + transpose_8, + slice_1, + assign_11, + scale_1, + transpose_9, + matmul_6, + reshape_12, + index_select_1, + transpose_10, + unsqueeze_1, + add_8, + reshape_13, + unsqueeze_2, + add_9, + softmax_1, + transpose_11, + reshape_14, + matmul_7, + add_10, + reshape_15, + transpose_12, + reshape_16, + roll_1, + reshape_17, + full_1, + floor_0, + divide_0, + multiply_0, + add_11, + layer_norm_12, + layer_norm_13, + layer_norm_14, + matmul_8, + add_12, + gelu_1, + matmul_9, + add_13, + assign_13, + floor_1, + divide_1, + multiply_1, + add_14, + reshape_18, + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + full_2, + concat_0, + reshape_19, + layer_norm_15, + layer_norm_16, + layer_norm_17, + matmul_10, + layer_norm_18, + layer_norm_19, + layer_norm_20, + reshape_20, + transpose_13, + reshape_21, + reshape_22, + matmul_11, + add_15, + transpose_14, + slice_2, + assign_29, + scale_2, + transpose_15, + matmul_12, + reshape_23, + index_select_2, + transpose_16, + unsqueeze_3, + softmax_2, + transpose_17, + reshape_24, + matmul_13, + add_16, + reshape_25, + transpose_18, + reshape_26, + reshape_27, + full_3, + floor_2, + divide_2, + multiply_2, + add_17, + layer_norm_21, + layer_norm_22, + layer_norm_23, + matmul_14, + add_18, + gelu_2, + matmul_15, + add_19, + assign_31, + floor_3, + divide_3, + multiply_3, + add_20, + layer_norm_24, + layer_norm_25, + layer_norm_26, + reshape_28, + roll_2, + transpose_19, + reshape_29, + reshape_30, + matmul_16, + add_21, + transpose_20, + slice_3, + assign_39, + scale_3, + transpose_21, + matmul_17, + reshape_31, + index_select_3, + transpose_22, + unsqueeze_4, + add_22, + reshape_32, + unsqueeze_5, + add_23, + softmax_3, + transpose_23, + reshape_33, + matmul_18, + add_24, + reshape_34, + transpose_24, + reshape_35, + roll_3, + reshape_36, + full_4, + floor_4, + divide_4, + multiply_4, + add_25, + layer_norm_27, + layer_norm_28, + layer_norm_29, + matmul_19, + add_26, + gelu_3, + matmul_20, + add_27, + assign_42, + floor_5, + divide_5, + multiply_5, + add_28, + reshape_37, + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + assign_55, + concat_1, + reshape_38, + layer_norm_30, + layer_norm_31, + layer_norm_32, + matmul_21, + layer_norm_33, + layer_norm_34, + layer_norm_35, + reshape_39, + transpose_25, + reshape_40, + reshape_41, + matmul_22, + add_29, + transpose_26, + slice_4, + assign_62, + scale_4, + transpose_27, + matmul_23, + reshape_42, + index_select_4, + transpose_28, + unsqueeze_6, + softmax_4, + transpose_29, + reshape_43, + matmul_24, + add_30, + reshape_44, + transpose_30, + reshape_45, + reshape_46, + full_5, + floor_6, + divide_6, + multiply_6, + add_31, + layer_norm_36, + layer_norm_37, + layer_norm_38, + matmul_25, + add_32, + gelu_4, + matmul_26, + add_33, + assign_64, + floor_7, + divide_7, + multiply_7, + add_34, + layer_norm_39, + layer_norm_40, + layer_norm_41, + reshape_47, + roll_4, + transpose_31, + reshape_48, + reshape_49, + matmul_27, + add_35, + transpose_32, + slice_5, + assign_72, + scale_5, + transpose_33, + matmul_28, + reshape_50, + index_select_5, + transpose_34, + unsqueeze_7, + add_36, + reshape_51, + unsqueeze_8, + add_37, + softmax_5, + transpose_35, + reshape_52, + matmul_29, + add_38, + reshape_53, + transpose_36, + reshape_54, + roll_5, + reshape_55, + full_6, + floor_8, + divide_8, + multiply_8, + add_39, + layer_norm_42, + layer_norm_43, + layer_norm_44, + matmul_30, + add_40, + gelu_5, + matmul_31, + add_41, + assign_75, + floor_9, + divide_9, + multiply_9, + add_42, + layer_norm_45, + layer_norm_46, + layer_norm_47, + reshape_56, + transpose_37, + reshape_57, + reshape_58, + matmul_32, + add_43, + transpose_38, + slice_6, + assign_82, + scale_6, + transpose_39, + matmul_33, + reshape_59, + index_select_6, + transpose_40, + unsqueeze_9, + softmax_6, + transpose_41, + reshape_60, + matmul_34, + add_44, + reshape_61, + transpose_42, + reshape_62, + reshape_63, + full_7, + floor_10, + divide_10, + multiply_10, + add_45, + layer_norm_48, + layer_norm_49, + layer_norm_50, + matmul_35, + add_46, + gelu_6, + matmul_36, + add_47, + assign_84, + floor_11, + divide_11, + multiply_11, + add_48, + layer_norm_51, + layer_norm_52, + layer_norm_53, + reshape_64, + roll_6, + transpose_43, + reshape_65, + reshape_66, + matmul_37, + add_49, + transpose_44, + slice_7, + assign_92, + scale_7, + transpose_45, + matmul_38, + reshape_67, + index_select_7, + transpose_46, + unsqueeze_10, + add_50, + reshape_68, + unsqueeze_11, + add_51, + softmax_7, + transpose_47, + reshape_69, + matmul_39, + add_52, + reshape_70, + transpose_48, + reshape_71, + roll_7, + reshape_72, + full_8, + floor_12, + divide_12, + multiply_12, + add_53, + layer_norm_54, + layer_norm_55, + layer_norm_56, + matmul_40, + add_54, + gelu_7, + matmul_41, + add_55, + assign_95, + floor_13, + divide_13, + multiply_13, + add_56, + layer_norm_57, + layer_norm_58, + layer_norm_59, + reshape_73, + transpose_49, + reshape_74, + reshape_75, + matmul_42, + add_57, + transpose_50, + slice_8, + assign_102, + scale_8, + transpose_51, + matmul_43, + reshape_76, + index_select_8, + transpose_52, + unsqueeze_12, + softmax_8, + transpose_53, + reshape_77, + matmul_44, + add_58, + reshape_78, + transpose_54, + reshape_79, + reshape_80, + full_9, + floor_14, + divide_14, + multiply_14, + add_59, + layer_norm_60, + layer_norm_61, + layer_norm_62, + matmul_45, + add_60, + gelu_8, + matmul_46, + add_61, + assign_104, + floor_15, + divide_15, + multiply_15, + add_62, + layer_norm_63, + layer_norm_64, + layer_norm_65, + reshape_81, + roll_8, + transpose_55, + reshape_82, + reshape_83, + matmul_47, + add_63, + transpose_56, + slice_9, + assign_112, + scale_9, + transpose_57, + matmul_48, + reshape_84, + index_select_9, + transpose_58, + unsqueeze_13, + add_64, + reshape_85, + unsqueeze_14, + add_65, + softmax_9, + transpose_59, + reshape_86, + matmul_49, + add_66, + reshape_87, + transpose_60, + reshape_88, + roll_9, + reshape_89, + full_10, + floor_16, + divide_16, + multiply_16, + add_67, + layer_norm_66, + layer_norm_67, + layer_norm_68, + matmul_50, + add_68, + gelu_9, + matmul_51, + add_69, + assign_115, + floor_17, + divide_17, + multiply_17, + add_70, + reshape_90, + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + assign_128, + concat_2, + reshape_91, + layer_norm_69, + layer_norm_70, + layer_norm_71, + matmul_52, + layer_norm_72, + layer_norm_73, + layer_norm_74, + reshape_92, + transpose_61, + reshape_93, + reshape_94, + matmul_53, + add_71, + transpose_62, + slice_10, + assign_135, + scale_10, + transpose_63, + matmul_54, + reshape_95, + index_select_10, + transpose_64, + unsqueeze_15, + softmax_10, + transpose_65, + reshape_96, + matmul_55, + add_72, + reshape_97, + transpose_66, + reshape_98, + reshape_99, + full_11, + floor_18, + divide_18, + multiply_18, + add_73, + layer_norm_75, + layer_norm_76, + layer_norm_77, + matmul_56, + add_74, + gelu_10, + matmul_57, + add_75, + assign_137, + floor_19, + divide_19, + multiply_19, + add_76, + layer_norm_78, + layer_norm_79, + layer_norm_80, + reshape_100, + roll_10, + transpose_67, + reshape_101, + reshape_102, + matmul_58, + add_77, + transpose_68, + slice_11, + assign_145, + scale_11, + transpose_69, + matmul_59, + reshape_103, + index_select_11, + transpose_70, + unsqueeze_16, + add_78, + reshape_104, + unsqueeze_17, + add_79, + softmax_11, + transpose_71, + reshape_105, + matmul_60, + add_80, + reshape_106, + transpose_72, + reshape_107, + roll_11, + reshape_108, + full_12, + floor_20, + divide_20, + multiply_20, + add_81, + layer_norm_81, + layer_norm_82, + layer_norm_83, + matmul_61, + add_82, + gelu_11, + matmul_62, + add_83, + assign_148, + floor_21, + divide_21, + multiply_21, + add_84, + layer_norm_84, + layer_norm_85, + transpose_73, + unsqueeze_18, + pool2d_0, + squeeze_0, + flatten_0, + matmul_63, + add_85, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/weight_meta.py new file mode 100644 index 00000000..fb09e5cb --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_1/weight_meta.py @@ -0,0 +1,1447 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [1536, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [768, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [384, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [384, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [96, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [96, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [96, 288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [384, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [96, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [96, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [96, 288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [96, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/graph_hash.txt b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/graph_hash.txt new file mode 100644 index 00000000..74dd08e2 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/graph_hash.txt @@ -0,0 +1 @@ +3b849b190c48ef4072dbe90b9d91b07b0dd58fc0a7b7f5f59ae0f85cec51971d \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/graph_net.json b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/graph_net.json new file mode 100644 index 00000000..368ac6fc --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/graph_net.json @@ -0,0 +1,5 @@ +{ + "framework": "paddle", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/input_meta.py b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/input_meta.py new file mode 100644 index 00000000..639dab89 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/input_meta.py @@ -0,0 +1,223 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [128, 3, 224, 224] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_2: + name = "data_2" + shape = [169, 3] + dtype = "float32" + low = -10.7123 + high = 4.39805 + data = None + + +class Program_weight_tensor_data_3: + name = "data_3" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_4: + name = "data_4" + shape = [169, 3] + dtype = "float32" + low = -8.27883 + high = 3.66943 + data = None + + +class Program_weight_tensor_data_5: + name = "data_5" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_6: + name = "data_6" + shape = [169, 6] + dtype = "float32" + low = -6.35925 + high = 7.23657 + data = None + + +class Program_weight_tensor_data_7: + name = "data_7" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_8: + name = "data_8" + shape = [169, 6] + dtype = "float32" + low = -9.1059 + high = 5.17685 + data = None + + +class Program_weight_tensor_data_9: + name = "data_9" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_10: + name = "data_10" + shape = [169, 12] + dtype = "float32" + low = -10.2283 + high = 4.4662 + data = None + + +class Program_weight_tensor_data_11: + name = "data_11" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_12: + name = "data_12" + shape = [169, 12] + dtype = "float32" + low = -10.5657 + high = 4.75079 + data = None + + +class Program_weight_tensor_data_13: + name = "data_13" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_14: + name = "data_14" + shape = [169, 12] + dtype = "float32" + low = -11.5154 + high = 3.18744 + data = None + + +class Program_weight_tensor_data_15: + name = "data_15" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_16: + name = "data_16" + shape = [169, 12] + dtype = "float32" + low = -7.51021 + high = 3.70003 + data = None + + +class Program_weight_tensor_data_17: + name = "data_17" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_18: + name = "data_18" + shape = [169, 12] + dtype = "float32" + low = -10.5961 + high = 3.74184 + data = None + + +class Program_weight_tensor_data_19: + name = "data_19" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_20: + name = "data_20" + shape = [169, 12] + dtype = "float32" + low = -13.1406 + high = 3.28118 + data = None + + +class Program_weight_tensor_data_21: + name = "data_21" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_22: + name = "data_22" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_data_23: + name = "data_23" + shape = [49, 49] + dtype = "int64" + low = 0 + high = 3 + data = None + + +class Program_weight_tensor_data_24: + name = "data_24" + shape = [169, 24] + dtype = "float32" + low = 0 + high = 0.5 + data = None diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/model.py b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/model.py new file mode 100644 index 00000000..126dbe35 --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/model.py @@ -0,0 +1,5298 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + data_0, + data_1, + data_2, + data_3, + data_4, + data_5, + data_6, + data_7, + data_8, + data_9, + data_10, + data_11, + data_12, + data_13, + data_14, + data_15, + data_16, + data_17, + data_18, + data_19, + data_20, + data_21, + data_22, + data_23, + data_24, + ): + # pd_op.shape64: (4xi64) <- (-1x3x224x224xf32) + shape64_0 = paddle._C_ops.shape64(data_0) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_0 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [1] + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + shape64_0, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_0 + + # pd_op.conv2d: (-1x96x56x56xf32) <- (-1x3x224x224xf32, 96x3x4x4xf32) + conv2d_0 = paddle._C_ops.conv2d( + data_0, parameter_160, [4, 4], [0, 0], "EXPLICIT", [1, 1], 1, "NCHW" + ) + del data_0, parameter_160 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_2 = [1, -1, 1, 1] + + # pd_op.reshape: (1x96x1x1xf32) <- (96xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(parameter_159, full_int_array_2) + del full_int_array_2, parameter_159 + + # pd_op.add: (-1x96x56x56xf32) <- (-1x96x56x56xf32, 1x96x1x1xf32) + add_1 = paddle._C_ops.add(conv2d_0, reshape_0) + del conv2d_0, reshape_0 + + # pd_op.shape64: (4xi64) <- (-1x96x56x56xf32) + shape64_1 = paddle._C_ops.shape64(add_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + shape64_1, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_1 + + # pd_op.flatten: (-1x96x3136xf32) <- (-1x96x56x56xf32) + flatten_0 = paddle._C_ops.flatten(add_1, 2, 3) + del add_1 + + # pd_op.transpose: (-1x3136x96xf32) <- (-1x96x3136xf32) + transpose_0 = paddle._C_ops.transpose(flatten_0, [0, 2, 1]) + del flatten_0 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + transpose_0, parameter_158, parameter_157, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_157, parameter_158, transpose_0 + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_2 = paddle._C_ops.shape64(layer_norm_0) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + shape64_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_2 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + layer_norm_0, parameter_156, parameter_155, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_155, parameter_156 + + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("56"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_1 = paddle._C_ops.full( + [], float("96"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_0 = [slice_2, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_0 = paddle._C_ops.stack(combine_0, 0) + del combine_0 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x3136x96xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(layer_norm_3, stack_0) + del layer_norm_3, stack_0 + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_3 = paddle._C_ops.shape64(reshape_1) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + shape64_3, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_3 + + # pd_op.full: (xi64) <- () + full_2 = paddle._C_ops.full( + [], float("8"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_3 = paddle._C_ops.full( + [], float("7"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_1 = [slice_3, full_2, full_3, full_2, full_3, full_1] + del slice_3 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_1 = paddle._C_ops.stack(combine_1, 0) + del combine_1 + + # pd_op.reshape: (-1x8x7x8x7x96xf32) <- (-1x56x56x96xf32, 6xi64) + reshape_2 = paddle._C_ops.reshape(reshape_1, stack_1) + del reshape_1, stack_1 + + # pd_op.transpose: (-1x8x8x7x7x96xf32) <- (-1x8x7x8x7x96xf32) + transpose_1 = paddle._C_ops.transpose(reshape_2, [0, 1, 3, 2, 4, 5]) + del reshape_2 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_3 = [-1, 7, 7, 96] + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x8x8x7x7x96xf32, 4xi64) + reshape_3 = paddle._C_ops.reshape(transpose_1, full_int_array_3) + del transpose_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_4 = [-1, 49, 96] + + # pd_op.reshape: (-1x49x96xf32) <- (-1x7x7x96xf32, 3xi64) + reshape_4 = paddle._C_ops.reshape(reshape_3, full_int_array_4) + del reshape_3 + + # pd_op.shape64: (3xi64) <- (-1x49x96xf32) + shape64_4 = paddle._C_ops.shape64(reshape_4) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + shape64_4, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_4 + + # pd_op.matmul: (-1x49x288xf32) <- (-1x49x96xf32, 96x288xf32) + matmul_0 = paddle._C_ops.matmul(reshape_4, parameter_154, False, False) + del parameter_154, reshape_4 + + # pd_op.add: (-1x49x288xf32) <- (-1x49x288xf32, 288xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_153) + del matmul_0, parameter_153 + + # pd_op.full: (xi64) <- () + full_4 = paddle._C_ops.full( + [], float("49"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_5 = paddle._C_ops.full( + [], float("3"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_6 = paddle._C_ops.full( + [], float("32"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_2 = [slice_4, full_4, full_5, full_5, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_2 = paddle._C_ops.stack(combine_2, 0) + del combine_2 + + # pd_op.reshape: (-1x49x3x3x32xf32) <- (-1x49x288xf32, 5xi64) + reshape_5 = paddle._C_ops.reshape(add_2, stack_2) + del add_2, stack_2 + + # pd_op.transpose: (3x-1x3x49x32xf32) <- (-1x49x3x3x32xf32) + transpose_2 = paddle._C_ops.transpose(reshape_5, [2, 0, 3, 1, 4]) + del reshape_5 + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_5 = [2] + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_6 = [3] + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + transpose_2, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_2 + + # pd_op.full: (1xf32) <- () + full_7 = paddle._C_ops.full( + [1], float("0.176777"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (-1x3x49x32xf32) <- (-1x3x49x32xf32, 1xf32) + scale_0 = paddle._C_ops.scale(slice_5, full_7, float("0"), True) + del slice_5 + + # pd_op.transpose: (-1x3x32x49xf32) <- (-1x3x49x32xf32) + transpose_3 = paddle._C_ops.transpose(slice_6, [0, 1, 3, 2]) + del slice_6 + + # pd_op.matmul: (-1x3x49x49xf32) <- (-1x3x49x32xf32, -1x3x32x49xf32) + matmul_1 = paddle._C_ops.matmul(scale_0, transpose_3, False, False) + del scale_0, transpose_3 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [-1] + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_6 = paddle._C_ops.reshape(data_1, full_int_array_7) + del data_1 + + # pd_op.index_select: (2401x3xf32) <- (169x3xf32, 2401xi64) + index_select_0 = paddle._C_ops.index_select(data_2, reshape_6, 0) + del data_2, reshape_6 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_8 = [49, 49, -1] + + # pd_op.reshape: (49x49x3xf32) <- (2401x3xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(index_select_0, full_int_array_8) + del index_select_0 + + # pd_op.transpose: (3x49x49xf32) <- (49x49x3xf32) + transpose_4 = paddle._C_ops.transpose(reshape_7, [2, 0, 1]) + del reshape_7 + + # pd_op.unsqueeze: (1x3x49x49xf32) <- (3x49x49xf32, 1xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(transpose_4, full_int_array_0) + del transpose_4 + + # pd_op.add: (-1x3x49x49xf32) <- (-1x3x49x49xf32, 1x3x49x49xf32) + add_3 = paddle._C_ops.add(matmul_1, unsqueeze_0) + del matmul_1, unsqueeze_0 + + # pd_op.softmax: (-1x3x49x49xf32) <- (-1x3x49x49xf32) + softmax_0 = paddle._C_ops.softmax(add_3, -1) + del add_3 + + # pd_op.matmul: (-1x3x49x32xf32) <- (-1x3x49x49xf32, -1x3x49x32xf32) + matmul_2 = paddle._C_ops.matmul(softmax_0, slice_7, False, False) + del slice_7, softmax_0 + + # pd_op.transpose: (-1x49x3x32xf32) <- (-1x3x49x32xf32) + transpose_5 = paddle._C_ops.transpose(matmul_2, [0, 2, 1, 3]) + del matmul_2 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_3 = [slice_4, full_4, full_1] + del slice_4 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_3 = paddle._C_ops.stack(combine_3, 0) + del combine_3 + + # pd_op.reshape: (-1x49x96xf32) <- (-1x49x3x32xf32, 3xi64) + reshape_8 = paddle._C_ops.reshape(transpose_5, stack_3) + del stack_3, transpose_5 + + # pd_op.matmul: (-1x49x96xf32) <- (-1x49x96xf32, 96x96xf32) + matmul_3 = paddle._C_ops.matmul(reshape_8, parameter_152, False, False) + del parameter_152, reshape_8 + + # pd_op.add: (-1x49x96xf32) <- (-1x49x96xf32, 96xf32) + add_4 = paddle._C_ops.add(matmul_3, parameter_151) + del matmul_3, parameter_151 + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x49x96xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_4, full_int_array_3) + del add_4 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_9 = [-1, 8, 8, 7, 7, 96] + + # pd_op.reshape: (-1x8x8x7x7x96xf32) <- (-1x7x7x96xf32, 6xi64) + reshape_10 = paddle._C_ops.reshape(reshape_9, full_int_array_9) + del reshape_9 + + # pd_op.transpose: (-1x8x7x8x7x96xf32) <- (-1x8x8x7x7x96xf32) + transpose_6 = paddle._C_ops.transpose(reshape_10, [0, 1, 3, 2, 4, 5]) + del reshape_10 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_10 = [-1, 56, 56, 96] + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x8x7x8x7x96xf32, 4xi64) + reshape_11 = paddle._C_ops.reshape(transpose_6, full_int_array_10) + del transpose_6 + + # pd_op.full: (xi64) <- () + full_8 = paddle._C_ops.full( + [], float("3136"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_4 = [slice_2, full_8, full_1] + del slice_2 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_4 = paddle._C_ops.stack(combine_4, 0) + del combine_4 + + # pd_op.reshape: (-1x3136x96xf32) <- (-1x56x56x96xf32, 3xi64) + reshape_12 = paddle._C_ops.reshape(reshape_11, stack_4) + del reshape_11, stack_4 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_5 = paddle._C_ops.add(layer_norm_0, reshape_12) + del layer_norm_0, reshape_12 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_5, parameter_150, parameter_149, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_149, parameter_150 + + # pd_op.matmul: (-1x3136x384xf32) <- (-1x3136x96xf32, 96x384xf32) + matmul_4 = paddle._C_ops.matmul(layer_norm_6, parameter_148, False, False) + del layer_norm_6, parameter_148 + + # pd_op.add: (-1x3136x384xf32) <- (-1x3136x384xf32, 384xf32) + add_6 = paddle._C_ops.add(matmul_4, parameter_147) + del matmul_4, parameter_147 + + # pd_op.gelu: (-1x3136x384xf32) <- (-1x3136x384xf32) + gelu_0 = paddle._C_ops.gelu(add_6, False) + del add_6 + + # pd_op.matmul: (-1x3136x96xf32) <- (-1x3136x384xf32, 384x96xf32) + matmul_5 = paddle._C_ops.matmul(gelu_0, parameter_146, False, False) + del gelu_0, parameter_146 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, 96xf32) + add_7 = paddle._C_ops.add(matmul_5, parameter_145) + del matmul_5, parameter_145 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_8 = paddle._C_ops.add(add_5, add_7) + del add_5, add_7 + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_5 = paddle._C_ops.shape64(add_8) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + shape64_5, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_5 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_144, parameter_143, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_143, parameter_144 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_5 = [slice_8, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_5 = paddle._C_ops.stack(combine_5, 0) + del combine_5 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x3136x96xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(layer_norm_9, stack_5) + del layer_norm_9, stack_5 + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_6 = paddle._C_ops.shape64(reshape_13) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + shape64_6, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_11 = [-3, -3] + + # pd_op.roll: (-1x56x56x96xf32) <- (-1x56x56x96xf32, 2xi64) + roll_0 = paddle._C_ops.roll(reshape_13, full_int_array_11, [1, 2]) + del reshape_13 + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_7 = paddle._C_ops.shape64(roll_0) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + shape64_7, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_7 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_6 = [slice_10, full_2, full_3, full_2, full_3, full_1] + del full_2, slice_10 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_6 = paddle._C_ops.stack(combine_6, 0) + del combine_6 + + # pd_op.reshape: (-1x8x7x8x7x96xf32) <- (-1x56x56x96xf32, 6xi64) + reshape_14 = paddle._C_ops.reshape(roll_0, stack_6) + del roll_0, stack_6 + + # pd_op.transpose: (-1x8x8x7x7x96xf32) <- (-1x8x7x8x7x96xf32) + transpose_7 = paddle._C_ops.transpose(reshape_14, [0, 1, 3, 2, 4, 5]) + del reshape_14 + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x8x8x7x7x96xf32, 4xi64) + reshape_15 = paddle._C_ops.reshape(transpose_7, full_int_array_3) + del transpose_7 + + # pd_op.reshape: (-1x49x96xf32) <- (-1x7x7x96xf32, 3xi64) + reshape_16 = paddle._C_ops.reshape(reshape_15, full_int_array_4) + del full_int_array_4, reshape_15 + + # pd_op.full: (1x56x56x1xf32) <- () + full_9 = paddle._C_ops.full( + [1, 56, 56, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_12 = [0, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_13 = [-7, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_14 = [1, 1] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__6 = paddle._C_ops.set_value_( + full_9, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_9 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_15 = [0, -7] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_16 = [-7, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__7 = paddle._C_ops.set_value_( + set_value__6, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__6 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_17 = [0, -3] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_18 = [-7, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__8 = paddle._C_ops.set_value_( + set_value__7, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__7 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_19 = [-7, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_20 = [-3, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__9 = paddle._C_ops.set_value_( + set_value__8, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__8 + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__10 = paddle._C_ops.set_value_( + set_value__9, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__9 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_21 = [-3, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__11 = paddle._C_ops.set_value_( + set_value__10, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__10 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_22 = [-3, 0] + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_23 = [2147483647, -7] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__12 = paddle._C_ops.set_value_( + set_value__11, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__11 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_24 = [2147483647, -3] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__13 = paddle._C_ops.set_value_( + set_value__12, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_25 = [2147483647, 2147483647] + + # pd_op.set_value_: (1x56x56x1xf32) <- (1x56x56x1xf32, 2xi64, 2xi64, 2xi64) + set_value__0 = paddle._C_ops.set_value_( + set_value__13, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__13 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_26 = [1, 8, 7, 8, 7, 1] + + # pd_op.reshape: (1x8x7x8x7x1xf32) <- (1x56x56x1xf32, 6xi64) + reshape_17 = paddle._C_ops.reshape(set_value__0, full_int_array_26) + del full_int_array_26 + + # pd_op.transpose: (1x8x8x7x7x1xf32) <- (1x8x7x8x7x1xf32) + transpose_8 = paddle._C_ops.transpose(reshape_17, [0, 1, 3, 2, 4, 5]) + del reshape_17 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_27 = [-1, 7, 7, 1] + + # pd_op.reshape: (64x7x7x1xf32) <- (1x8x8x7x7x1xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(transpose_8, full_int_array_27) + del transpose_8 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_28 = [-1, 49] + + # pd_op.reshape: (64x49xf32) <- (64x7x7x1xf32, 2xi64) + reshape_19 = paddle._C_ops.reshape(reshape_18, full_int_array_28) + del reshape_18 + + # pd_op.unsqueeze: (64x1x49xf32) <- (64x49xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(reshape_19, full_int_array_1) + + # pd_op.unsqueeze: (64x49x1xf32) <- (64x49xf32, 1xi64) + unsqueeze_2 = paddle._C_ops.unsqueeze(reshape_19, full_int_array_5) + del reshape_19 + + # pd_op.subtract: (64x49x49xf32) <- (64x1x49xf32, 64x49x1xf32) + subtract_0 = paddle._C_ops.subtract(unsqueeze_1, unsqueeze_2) + del unsqueeze_1, unsqueeze_2 + + # pd_op.full: (xf32) <- () + full_10 = paddle._C_ops.full( + [], float("0"), paddle.float32, paddle.framework._current_expected_place() + ) + + # pd_op.not_equal: (64x49x49xb) <- (64x49x49xf32, xf32) + not_equal_0 = paddle._C_ops.not_equal(subtract_0, full_10) + + # pd_op.full: (64x49x49xf32) <- () + full_11 = paddle._C_ops.full( + [64, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_0 = paddle._C_ops.where(not_equal_0, full_11, subtract_0) + del full_11, not_equal_0, subtract_0 + + # pd_op.equal: (64x49x49xb) <- (64x49x49xf32, xf32) + equal_0 = paddle._C_ops.equal(where_0, full_10) + + # pd_op.full: (64x49x49xf32) <- () + full_12 = paddle._C_ops.full( + [64, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (64x49x49xf32) <- (64x49x49xb, 64x49x49xf32, 64x49x49xf32) + where_1 = paddle._C_ops.where(equal_0, full_12, where_0) + del equal_0, full_12, where_0 + + # pd_op.shape64: (3xi64) <- (-1x49x96xf32) + shape64_8 = paddle._C_ops.shape64(reshape_16) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + shape64_8, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_8 + + # pd_op.matmul: (-1x49x288xf32) <- (-1x49x96xf32, 96x288xf32) + matmul_6 = paddle._C_ops.matmul(reshape_16, parameter_142, False, False) + del parameter_142, reshape_16 + + # pd_op.add: (-1x49x288xf32) <- (-1x49x288xf32, 288xf32) + add_9 = paddle._C_ops.add(matmul_6, parameter_141) + del matmul_6, parameter_141 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_7 = [slice_11, full_4, full_5, full_5, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_7 = paddle._C_ops.stack(combine_7, 0) + del combine_7 + + # pd_op.reshape: (-1x49x3x3x32xf32) <- (-1x49x288xf32, 5xi64) + reshape_20 = paddle._C_ops.reshape(add_9, stack_7) + del add_9, stack_7 + + # pd_op.transpose: (3x-1x3x49x32xf32) <- (-1x49x3x3x32xf32) + transpose_9 = paddle._C_ops.transpose(reshape_20, [2, 0, 3, 1, 4]) + del reshape_20 + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x3x49x32xf32) <- (3x-1x3x49x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + transpose_9, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_9 + + # pd_op.scale: (-1x3x49x32xf32) <- (-1x3x49x32xf32, 1xf32) + scale_1 = paddle._C_ops.scale(slice_12, full_7, float("0"), True) + del slice_12 + + # pd_op.transpose: (-1x3x32x49xf32) <- (-1x3x49x32xf32) + transpose_10 = paddle._C_ops.transpose(slice_13, [0, 1, 3, 2]) + del slice_13 + + # pd_op.matmul: (-1x3x49x49xf32) <- (-1x3x49x32xf32, -1x3x32x49xf32) + matmul_7 = paddle._C_ops.matmul(scale_1, transpose_10, False, False) + del scale_1, transpose_10 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_21 = paddle._C_ops.reshape(data_3, full_int_array_7) + del data_3 + + # pd_op.index_select: (2401x3xf32) <- (169x3xf32, 2401xi64) + index_select_1 = paddle._C_ops.index_select(data_4, reshape_21, 0) + del data_4, reshape_21 + + # pd_op.reshape: (49x49x3xf32) <- (2401x3xf32, 3xi64) + reshape_22 = paddle._C_ops.reshape(index_select_1, full_int_array_8) + del index_select_1 + + # pd_op.transpose: (3x49x49xf32) <- (49x49x3xf32) + transpose_11 = paddle._C_ops.transpose(reshape_22, [2, 0, 1]) + del reshape_22 + + # pd_op.unsqueeze: (1x3x49x49xf32) <- (3x49x49xf32, 1xi64) + unsqueeze_3 = paddle._C_ops.unsqueeze(transpose_11, full_int_array_0) + del transpose_11 + + # pd_op.add: (-1x3x49x49xf32) <- (-1x3x49x49xf32, 1x3x49x49xf32) + add_10 = paddle._C_ops.add(matmul_7, unsqueeze_3) + del matmul_7, unsqueeze_3 + + # pd_op.full: (xi64) <- () + full_13 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_0 = paddle._C_ops.floor_divide(slice_11, full_13) + del full_13 + + # pd_op.full: (xi64) <- () + full_14 = paddle._C_ops.full( + [], float("64"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_8 = [floor_divide_0, full_14, full_5, full_4, full_4] + del floor_divide_0, full_14 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_8 = paddle._C_ops.stack(combine_8, 0) + del combine_8 + + # pd_op.reshape: (-1x64x3x49x49xf32) <- (-1x3x49x49xf32, 5xi64) + reshape_23 = paddle._C_ops.reshape(add_10, stack_8) + del add_10, stack_8 + + # pd_op.unsqueeze: (64x1x49x49xf32) <- (64x49x49xf32, 1xi64) + unsqueeze_4 = paddle._C_ops.unsqueeze(where_1, full_int_array_1) + del where_1 + + # pd_op.unsqueeze: (1x64x1x49x49xf32) <- (64x1x49x49xf32, 1xi64) + unsqueeze_5 = paddle._C_ops.unsqueeze(unsqueeze_4, full_int_array_0) + del unsqueeze_4 + + # pd_op.add: (-1x64x3x49x49xf32) <- (-1x64x3x49x49xf32, 1x64x1x49x49xf32) + add_11 = paddle._C_ops.add(reshape_23, unsqueeze_5) + del reshape_23, unsqueeze_5 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_9 = [slice_11, full_5, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_9 = paddle._C_ops.stack(combine_9, 0) + del combine_9 + + # pd_op.reshape: (-1x3x49x49xf32) <- (-1x64x3x49x49xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_11, stack_9) + del add_11, stack_9 + + # pd_op.softmax: (-1x3x49x49xf32) <- (-1x3x49x49xf32) + softmax_1 = paddle._C_ops.softmax(reshape_24, -1) + del reshape_24 + + # pd_op.matmul: (-1x3x49x32xf32) <- (-1x3x49x49xf32, -1x3x49x32xf32) + matmul_8 = paddle._C_ops.matmul(softmax_1, slice_14, False, False) + del slice_14, softmax_1 + + # pd_op.transpose: (-1x49x3x32xf32) <- (-1x3x49x32xf32) + transpose_12 = paddle._C_ops.transpose(matmul_8, [0, 2, 1, 3]) + del matmul_8 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_10 = [slice_11, full_4, full_1] + del slice_11 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_10 = paddle._C_ops.stack(combine_10, 0) + del combine_10 + + # pd_op.reshape: (-1x49x96xf32) <- (-1x49x3x32xf32, 3xi64) + reshape_25 = paddle._C_ops.reshape(transpose_12, stack_10) + del stack_10, transpose_12 + + # pd_op.matmul: (-1x49x96xf32) <- (-1x49x96xf32, 96x96xf32) + matmul_9 = paddle._C_ops.matmul(reshape_25, parameter_140, False, False) + del parameter_140, reshape_25 + + # pd_op.add: (-1x49x96xf32) <- (-1x49x96xf32, 96xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_139) + del matmul_9, parameter_139 + + # pd_op.reshape: (-1x7x7x96xf32) <- (-1x49x96xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_12, full_int_array_3) + del add_12, full_int_array_3 + + # pd_op.reshape: (-1x8x8x7x7x96xf32) <- (-1x7x7x96xf32, 6xi64) + reshape_27 = paddle._C_ops.reshape(reshape_26, full_int_array_9) + del full_int_array_9, reshape_26 + + # pd_op.transpose: (-1x8x7x8x7x96xf32) <- (-1x8x8x7x7x96xf32) + transpose_13 = paddle._C_ops.transpose(reshape_27, [0, 1, 3, 2, 4, 5]) + del reshape_27 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x8x7x8x7x96xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(transpose_13, full_int_array_10) + del full_int_array_10, transpose_13 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_29 = [3, 3] + + # pd_op.roll: (-1x56x56x96xf32) <- (-1x56x56x96xf32, 2xi64) + roll_1 = paddle._C_ops.roll(reshape_28, full_int_array_29, [1, 2]) + del reshape_28 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_11 = [slice_8, full_8, full_1] + del full_8, slice_8 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_11 = paddle._C_ops.stack(combine_11, 0) + del combine_11 + + # pd_op.reshape: (-1x3136x96xf32) <- (-1x56x56x96xf32, 3xi64) + reshape_29 = paddle._C_ops.reshape(roll_1, stack_11) + del roll_1, stack_11 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_13 = paddle._C_ops.add(add_8, reshape_29) + del add_8, reshape_29 + + # pd_op.layer_norm: (-1x3136x96xf32, -1x3136xf32, -1x3136xf32) <- (-1x3136x96xf32, 96xf32, 96xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_13, parameter_138, parameter_137, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_137, parameter_138 + + # pd_op.matmul: (-1x3136x384xf32) <- (-1x3136x96xf32, 96x384xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_12, parameter_136, False, False) + del layer_norm_12, parameter_136 + + # pd_op.add: (-1x3136x384xf32) <- (-1x3136x384xf32, 384xf32) + add_14 = paddle._C_ops.add(matmul_10, parameter_135) + del matmul_10, parameter_135 + + # pd_op.gelu: (-1x3136x384xf32) <- (-1x3136x384xf32) + gelu_1 = paddle._C_ops.gelu(add_14, False) + del add_14 + + # pd_op.matmul: (-1x3136x96xf32) <- (-1x3136x384xf32, 384x96xf32) + matmul_11 = paddle._C_ops.matmul(gelu_1, parameter_134, False, False) + del gelu_1, parameter_134 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, 96xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_133) + del matmul_11, parameter_133 + + # pd_op.add: (-1x3136x96xf32) <- (-1x3136x96xf32, -1x3136x96xf32) + add_16 = paddle._C_ops.add(add_13, add_15) + del add_13, add_15 + + # pd_op.shape64: (3xi64) <- (-1x3136x96xf32) + shape64_9 = paddle._C_ops.shape64(add_16) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + shape64_9, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_9 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_12 = [slice_15, full_0, full_0, full_1] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_12 = paddle._C_ops.stack(combine_12, 0) + del combine_12 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x3136x96xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_16, stack_12) + del add_16, stack_12 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_30 = [2, 2] + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_31 = [1, 0] + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_32 = [0, 1] + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x28x28x96xf32) <- (-1x56x56x96xf32, 2xi64, 2xi64, 2xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + reshape_30, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + + # pd_op.shape64: (4xi64) <- (-1x56x56x96xf32) + shape64_10 = paddle._C_ops.shape64(reshape_30) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + shape64_10, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_10 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_13 = [slice_16, full_0, full_0, full_1] + del full_0, full_1, slice_16 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_13 = paddle._C_ops.stack(combine_13, 0) + del combine_13 + + # pd_op.reshape: (-1x56x56x96xf32) <- (-1x56x56x96xf32, 4xi64) + reshape_31 = paddle._C_ops.reshape(reshape_30, stack_13) + del reshape_30, stack_13 + + # pd_op.full: (1xi32) <- () + full_15 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([-1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32]) <- (-1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32) + combine_14 = [ + strided_slice_0, + strided_slice_1, + strided_slice_2, + strided_slice_3, + ] + del strided_slice_0, strided_slice_1, strided_slice_2, strided_slice_3 + + # pd_op.concat: (-1x28x28x384xf32) <- ([-1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32, -1x28x28x96xf32], 1xi32) + concat_0 = paddle._C_ops.concat(combine_14, full_15) + del combine_14 + + # pd_op.full: (xi64) <- () + full_16 = paddle._C_ops.full( + [], float("-1"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_17 = paddle._C_ops.full( + [], float("384"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_15 = [slice_15, full_16, full_17] + del slice_15 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_14 = paddle._C_ops.stack(combine_15, 0) + del combine_15 + + # pd_op.reshape: (-1x-1x384xf32) <- (-1x28x28x384xf32, 3xi64) + reshape_32 = paddle._C_ops.reshape(concat_0, stack_14) + del concat_0, stack_14 + + # pd_op.layer_norm: (-1x-1x384xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x384xf32, 384xf32, 384xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_32, parameter_132, parameter_131, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_131, parameter_132, reshape_32 + + # pd_op.matmul: (-1x-1x192xf32) <- (-1x-1x384xf32, 384x192xf32) + matmul_12 = paddle._C_ops.matmul(layer_norm_15, parameter_130, False, False) + del layer_norm_15, parameter_130 + + # pd_op.shape64: (3xi64) <- (-1x-1x192xf32) + shape64_11 = paddle._C_ops.shape64(matmul_12) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + shape64_11, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_11 + + # pd_op.shape64: (3xi64) <- (-1x-1x192xf32) + shape64_12 = paddle._C_ops.shape64(matmul_12) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + shape64_12, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_12 + + # pd_op.layer_norm: (-1x-1x192xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x192xf32, 192xf32, 192xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_12, parameter_129, parameter_128, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_128, parameter_129 + + # pd_op.full: (xi64) <- () + full_18 = paddle._C_ops.full( + [], float("28"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (xi64) <- () + full_19 = paddle._C_ops.full( + [], float("192"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_16 = [slice_17, full_18, full_18, full_19] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_15 = paddle._C_ops.stack(combine_16, 0) + del combine_16 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x-1x192xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(layer_norm_18, stack_15) + del layer_norm_18, stack_15 + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_13 = paddle._C_ops.shape64(reshape_33) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + shape64_13, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_13 + + # pd_op.full: (xi64) <- () + full_20 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_17 = [slice_19, full_20, full_3, full_20, full_3, full_19] + del slice_19 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_16 = paddle._C_ops.stack(combine_17, 0) + del combine_17 + + # pd_op.reshape: (-1x4x7x4x7x192xf32) <- (-1x28x28x192xf32, 6xi64) + reshape_34 = paddle._C_ops.reshape(reshape_33, stack_16) + del reshape_33, stack_16 + + # pd_op.transpose: (-1x4x4x7x7x192xf32) <- (-1x4x7x4x7x192xf32) + transpose_14 = paddle._C_ops.transpose(reshape_34, [0, 1, 3, 2, 4, 5]) + del reshape_34 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_33 = [-1, 7, 7, 192] + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x4x4x7x7x192xf32, 4xi64) + reshape_35 = paddle._C_ops.reshape(transpose_14, full_int_array_33) + del transpose_14 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_34 = [-1, 49, 192] + + # pd_op.reshape: (-1x49x192xf32) <- (-1x7x7x192xf32, 3xi64) + reshape_36 = paddle._C_ops.reshape(reshape_35, full_int_array_34) + del reshape_35 + + # pd_op.shape64: (3xi64) <- (-1x49x192xf32) + shape64_14 = paddle._C_ops.shape64(reshape_36) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + shape64_14, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_14 + + # pd_op.matmul: (-1x49x576xf32) <- (-1x49x192xf32, 192x576xf32) + matmul_13 = paddle._C_ops.matmul(reshape_36, parameter_127, False, False) + del parameter_127, reshape_36 + + # pd_op.add: (-1x49x576xf32) <- (-1x49x576xf32, 576xf32) + add_17 = paddle._C_ops.add(matmul_13, parameter_126) + del matmul_13, parameter_126 + + # pd_op.full: (xi64) <- () + full_21 = paddle._C_ops.full( + [], float("6"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_18 = [slice_20, full_4, full_5, full_21, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_17 = paddle._C_ops.stack(combine_18, 0) + del combine_18 + + # pd_op.reshape: (-1x49x3x6x32xf32) <- (-1x49x576xf32, 5xi64) + reshape_37 = paddle._C_ops.reshape(add_17, stack_17) + del add_17, stack_17 + + # pd_op.transpose: (3x-1x6x49x32xf32) <- (-1x49x3x6x32xf32) + transpose_15 = paddle._C_ops.transpose(reshape_37, [2, 0, 3, 1, 4]) + del reshape_37 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + transpose_15, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_15 + + # pd_op.scale: (-1x6x49x32xf32) <- (-1x6x49x32xf32, 1xf32) + scale_2 = paddle._C_ops.scale(slice_21, full_7, float("0"), True) + del slice_21 + + # pd_op.transpose: (-1x6x32x49xf32) <- (-1x6x49x32xf32) + transpose_16 = paddle._C_ops.transpose(slice_22, [0, 1, 3, 2]) + del slice_22 + + # pd_op.matmul: (-1x6x49x49xf32) <- (-1x6x49x32xf32, -1x6x32x49xf32) + matmul_14 = paddle._C_ops.matmul(scale_2, transpose_16, False, False) + del scale_2, transpose_16 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_38 = paddle._C_ops.reshape(data_5, full_int_array_7) + del data_5 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_2 = paddle._C_ops.index_select(data_6, reshape_38, 0) + del data_6, reshape_38 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(index_select_2, full_int_array_8) + del index_select_2 + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_17 = paddle._C_ops.transpose(reshape_39, [2, 0, 1]) + del reshape_39 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_6 = paddle._C_ops.unsqueeze(transpose_17, full_int_array_0) + del transpose_17 + + # pd_op.add: (-1x6x49x49xf32) <- (-1x6x49x49xf32, 1x6x49x49xf32) + add_18 = paddle._C_ops.add(matmul_14, unsqueeze_6) + del matmul_14, unsqueeze_6 + + # pd_op.softmax: (-1x6x49x49xf32) <- (-1x6x49x49xf32) + softmax_2 = paddle._C_ops.softmax(add_18, -1) + del add_18 + + # pd_op.matmul: (-1x6x49x32xf32) <- (-1x6x49x49xf32, -1x6x49x32xf32) + matmul_15 = paddle._C_ops.matmul(softmax_2, slice_23, False, False) + del slice_23, softmax_2 + + # pd_op.transpose: (-1x49x6x32xf32) <- (-1x6x49x32xf32) + transpose_18 = paddle._C_ops.transpose(matmul_15, [0, 2, 1, 3]) + del matmul_15 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_19 = [slice_20, full_4, full_19] + del slice_20 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_18 = paddle._C_ops.stack(combine_19, 0) + del combine_19 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x49x6x32xf32, 3xi64) + reshape_40 = paddle._C_ops.reshape(transpose_18, stack_18) + del stack_18, transpose_18 + + # pd_op.matmul: (-1x49x192xf32) <- (-1x49x192xf32, 192x192xf32) + matmul_16 = paddle._C_ops.matmul(reshape_40, parameter_125, False, False) + del parameter_125, reshape_40 + + # pd_op.add: (-1x49x192xf32) <- (-1x49x192xf32, 192xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_124) + del matmul_16, parameter_124 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x49x192xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_19, full_int_array_33) + del add_19 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_35 = [-1, 4, 4, 7, 7, 192] + + # pd_op.reshape: (-1x4x4x7x7x192xf32) <- (-1x7x7x192xf32, 6xi64) + reshape_42 = paddle._C_ops.reshape(reshape_41, full_int_array_35) + del reshape_41 + + # pd_op.transpose: (-1x4x7x4x7x192xf32) <- (-1x4x4x7x7x192xf32) + transpose_19 = paddle._C_ops.transpose(reshape_42, [0, 1, 3, 2, 4, 5]) + del reshape_42 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_36 = [-1, 28, 28, 192] + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x4x7x4x7x192xf32, 4xi64) + reshape_43 = paddle._C_ops.reshape(transpose_19, full_int_array_36) + del transpose_19 + + # pd_op.full: (xi64) <- () + full_22 = paddle._C_ops.full( + [], float("784"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_20 = [slice_17, full_22, full_19] + del slice_17 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_19 = paddle._C_ops.stack(combine_20, 0) + del combine_20 + + # pd_op.reshape: (-1x784x192xf32) <- (-1x28x28x192xf32, 3xi64) + reshape_44 = paddle._C_ops.reshape(reshape_43, stack_19) + del reshape_43, stack_19 + + # pd_op.add: (-1x784x192xf32) <- (-1x-1x192xf32, -1x784x192xf32) + add_20 = paddle._C_ops.add(matmul_12, reshape_44) + del matmul_12, reshape_44 + + # pd_op.layer_norm: (-1x784x192xf32, -1x784xf32, -1x784xf32) <- (-1x784x192xf32, 192xf32, 192xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_123, parameter_122, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_122, parameter_123 + + # pd_op.matmul: (-1x784x768xf32) <- (-1x784x192xf32, 192x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_21, parameter_121, False, False) + del layer_norm_21, parameter_121 + + # pd_op.add: (-1x784x768xf32) <- (-1x784x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_120) + del matmul_17, parameter_120 + + # pd_op.gelu: (-1x784x768xf32) <- (-1x784x768xf32) + gelu_2 = paddle._C_ops.gelu(add_21, False) + del add_21 + + # pd_op.matmul: (-1x784x192xf32) <- (-1x784x768xf32, 768x192xf32) + matmul_18 = paddle._C_ops.matmul(gelu_2, parameter_119, False, False) + del gelu_2, parameter_119 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, 192xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_118) + del matmul_18, parameter_118 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, -1x784x192xf32) + add_23 = paddle._C_ops.add(add_20, add_22) + del add_20, add_22 + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_15 = paddle._C_ops.shape64(add_23) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + shape64_15, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_15 + + # pd_op.layer_norm: (-1x784x192xf32, -1x784xf32, -1x784xf32) <- (-1x784x192xf32, 192xf32, 192xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_23, parameter_117, parameter_116, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_116, parameter_117 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_21 = [slice_24, full_18, full_18, full_19] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_20 = paddle._C_ops.stack(combine_21, 0) + del combine_21 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x784x192xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(layer_norm_24, stack_20) + del layer_norm_24, stack_20 + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_16 = paddle._C_ops.shape64(reshape_45) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + shape64_16, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_16 + + # pd_op.roll: (-1x28x28x192xf32) <- (-1x28x28x192xf32, 2xi64) + roll_2 = paddle._C_ops.roll(reshape_45, full_int_array_11, [1, 2]) + del reshape_45 + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_17 = paddle._C_ops.shape64(roll_2) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + shape64_17, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_17 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_22 = [slice_26, full_20, full_3, full_20, full_3, full_19] + del slice_26 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_21 = paddle._C_ops.stack(combine_22, 0) + del combine_22 + + # pd_op.reshape: (-1x4x7x4x7x192xf32) <- (-1x28x28x192xf32, 6xi64) + reshape_46 = paddle._C_ops.reshape(roll_2, stack_21) + del roll_2, stack_21 + + # pd_op.transpose: (-1x4x4x7x7x192xf32) <- (-1x4x7x4x7x192xf32) + transpose_20 = paddle._C_ops.transpose(reshape_46, [0, 1, 3, 2, 4, 5]) + del reshape_46 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x4x4x7x7x192xf32, 4xi64) + reshape_47 = paddle._C_ops.reshape(transpose_20, full_int_array_33) + del transpose_20 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x7x7x192xf32, 3xi64) + reshape_48 = paddle._C_ops.reshape(reshape_47, full_int_array_34) + del full_int_array_34, reshape_47 + + # pd_op.full: (1x28x28x1xf32) <- () + full_23 = paddle._C_ops.full( + [1, 28, 28, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__14 = paddle._C_ops.set_value_( + full_23, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_23 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__15 = paddle._C_ops.set_value_( + set_value__14, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__14 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__16 = paddle._C_ops.set_value_( + set_value__15, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__15 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__17 = paddle._C_ops.set_value_( + set_value__16, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__16 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__18 = paddle._C_ops.set_value_( + set_value__17, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__17 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__19 = paddle._C_ops.set_value_( + set_value__18, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__18 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__20 = paddle._C_ops.set_value_( + set_value__19, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__19 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__21 = paddle._C_ops.set_value_( + set_value__20, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__20 + + # pd_op.set_value_: (1x28x28x1xf32) <- (1x28x28x1xf32, 2xi64, 2xi64, 2xi64) + set_value__1 = paddle._C_ops.set_value_( + set_value__21, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__21 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_37 = [1, 4, 7, 4, 7, 1] + + # pd_op.reshape: (1x4x7x4x7x1xf32) <- (1x28x28x1xf32, 6xi64) + reshape_49 = paddle._C_ops.reshape(set_value__1, full_int_array_37) + del full_int_array_37 + + # pd_op.transpose: (1x4x4x7x7x1xf32) <- (1x4x7x4x7x1xf32) + transpose_21 = paddle._C_ops.transpose(reshape_49, [0, 1, 3, 2, 4, 5]) + del reshape_49 + + # pd_op.reshape: (16x7x7x1xf32) <- (1x4x4x7x7x1xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(transpose_21, full_int_array_27) + del transpose_21 + + # pd_op.reshape: (16x49xf32) <- (16x7x7x1xf32, 2xi64) + reshape_51 = paddle._C_ops.reshape(reshape_50, full_int_array_28) + del reshape_50 + + # pd_op.unsqueeze: (16x1x49xf32) <- (16x49xf32, 1xi64) + unsqueeze_7 = paddle._C_ops.unsqueeze(reshape_51, full_int_array_1) + + # pd_op.unsqueeze: (16x49x1xf32) <- (16x49xf32, 1xi64) + unsqueeze_8 = paddle._C_ops.unsqueeze(reshape_51, full_int_array_5) + del reshape_51 + + # pd_op.subtract: (16x49x49xf32) <- (16x1x49xf32, 16x49x1xf32) + subtract_1 = paddle._C_ops.subtract(unsqueeze_7, unsqueeze_8) + del unsqueeze_7, unsqueeze_8 + + # pd_op.not_equal: (16x49x49xb) <- (16x49x49xf32, xf32) + not_equal_1 = paddle._C_ops.not_equal(subtract_1, full_10) + + # pd_op.full: (16x49x49xf32) <- () + full_24 = paddle._C_ops.full( + [16, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_2 = paddle._C_ops.where(not_equal_1, full_24, subtract_1) + del full_24, not_equal_1, subtract_1 + + # pd_op.equal: (16x49x49xb) <- (16x49x49xf32, xf32) + equal_1 = paddle._C_ops.equal(where_2, full_10) + + # pd_op.full: (16x49x49xf32) <- () + full_25 = paddle._C_ops.full( + [16, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (16x49x49xf32) <- (16x49x49xb, 16x49x49xf32, 16x49x49xf32) + where_3 = paddle._C_ops.where(equal_1, full_25, where_2) + del equal_1, full_25, where_2 + + # pd_op.shape64: (3xi64) <- (-1x49x192xf32) + shape64_18 = paddle._C_ops.shape64(reshape_48) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + shape64_18, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_18 + + # pd_op.matmul: (-1x49x576xf32) <- (-1x49x192xf32, 192x576xf32) + matmul_19 = paddle._C_ops.matmul(reshape_48, parameter_115, False, False) + del parameter_115, reshape_48 + + # pd_op.add: (-1x49x576xf32) <- (-1x49x576xf32, 576xf32) + add_24 = paddle._C_ops.add(matmul_19, parameter_114) + del matmul_19, parameter_114 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_23 = [slice_27, full_4, full_5, full_21, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_22 = paddle._C_ops.stack(combine_23, 0) + del combine_23 + + # pd_op.reshape: (-1x49x3x6x32xf32) <- (-1x49x576xf32, 5xi64) + reshape_52 = paddle._C_ops.reshape(add_24, stack_22) + del add_24, stack_22 + + # pd_op.transpose: (3x-1x6x49x32xf32) <- (-1x49x3x6x32xf32) + transpose_22 = paddle._C_ops.transpose(reshape_52, [2, 0, 3, 1, 4]) + del reshape_52 + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x6x49x32xf32) <- (3x-1x6x49x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + transpose_22, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_22 + + # pd_op.scale: (-1x6x49x32xf32) <- (-1x6x49x32xf32, 1xf32) + scale_3 = paddle._C_ops.scale(slice_28, full_7, float("0"), True) + del slice_28 + + # pd_op.transpose: (-1x6x32x49xf32) <- (-1x6x49x32xf32) + transpose_23 = paddle._C_ops.transpose(slice_29, [0, 1, 3, 2]) + del slice_29 + + # pd_op.matmul: (-1x6x49x49xf32) <- (-1x6x49x32xf32, -1x6x32x49xf32) + matmul_20 = paddle._C_ops.matmul(scale_3, transpose_23, False, False) + del scale_3, transpose_23 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_53 = paddle._C_ops.reshape(data_7, full_int_array_7) + del data_7 + + # pd_op.index_select: (2401x6xf32) <- (169x6xf32, 2401xi64) + index_select_3 = paddle._C_ops.index_select(data_8, reshape_53, 0) + del data_8, reshape_53 + + # pd_op.reshape: (49x49x6xf32) <- (2401x6xf32, 3xi64) + reshape_54 = paddle._C_ops.reshape(index_select_3, full_int_array_8) + del index_select_3 + + # pd_op.transpose: (6x49x49xf32) <- (49x49x6xf32) + transpose_24 = paddle._C_ops.transpose(reshape_54, [2, 0, 1]) + del reshape_54 + + # pd_op.unsqueeze: (1x6x49x49xf32) <- (6x49x49xf32, 1xi64) + unsqueeze_9 = paddle._C_ops.unsqueeze(transpose_24, full_int_array_0) + del transpose_24 + + # pd_op.add: (-1x6x49x49xf32) <- (-1x6x49x49xf32, 1x6x49x49xf32) + add_25 = paddle._C_ops.add(matmul_20, unsqueeze_9) + del matmul_20, unsqueeze_9 + + # pd_op.full: (xi64) <- () + full_26 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_1 = paddle._C_ops.floor_divide(slice_27, full_26) + del full_26 + + # pd_op.full: (xi64) <- () + full_27 = paddle._C_ops.full( + [], float("16"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_24 = [floor_divide_1, full_27, full_21, full_4, full_4] + del floor_divide_1, full_27 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_23 = paddle._C_ops.stack(combine_24, 0) + del combine_24 + + # pd_op.reshape: (-1x16x6x49x49xf32) <- (-1x6x49x49xf32, 5xi64) + reshape_55 = paddle._C_ops.reshape(add_25, stack_23) + del add_25, stack_23 + + # pd_op.unsqueeze: (16x1x49x49xf32) <- (16x49x49xf32, 1xi64) + unsqueeze_10 = paddle._C_ops.unsqueeze(where_3, full_int_array_1) + del where_3 + + # pd_op.unsqueeze: (1x16x1x49x49xf32) <- (16x1x49x49xf32, 1xi64) + unsqueeze_11 = paddle._C_ops.unsqueeze(unsqueeze_10, full_int_array_0) + del unsqueeze_10 + + # pd_op.add: (-1x16x6x49x49xf32) <- (-1x16x6x49x49xf32, 1x16x1x49x49xf32) + add_26 = paddle._C_ops.add(reshape_55, unsqueeze_11) + del reshape_55, unsqueeze_11 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_25 = [slice_27, full_21, full_4, full_4] + del full_21 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_24 = paddle._C_ops.stack(combine_25, 0) + del combine_25 + + # pd_op.reshape: (-1x6x49x49xf32) <- (-1x16x6x49x49xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(add_26, stack_24) + del add_26, stack_24 + + # pd_op.softmax: (-1x6x49x49xf32) <- (-1x6x49x49xf32) + softmax_3 = paddle._C_ops.softmax(reshape_56, -1) + del reshape_56 + + # pd_op.matmul: (-1x6x49x32xf32) <- (-1x6x49x49xf32, -1x6x49x32xf32) + matmul_21 = paddle._C_ops.matmul(softmax_3, slice_30, False, False) + del slice_30, softmax_3 + + # pd_op.transpose: (-1x49x6x32xf32) <- (-1x6x49x32xf32) + transpose_25 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_26 = [slice_27, full_4, full_19] + del slice_27 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_25 = paddle._C_ops.stack(combine_26, 0) + del combine_26 + + # pd_op.reshape: (-1x49x192xf32) <- (-1x49x6x32xf32, 3xi64) + reshape_57 = paddle._C_ops.reshape(transpose_25, stack_25) + del stack_25, transpose_25 + + # pd_op.matmul: (-1x49x192xf32) <- (-1x49x192xf32, 192x192xf32) + matmul_22 = paddle._C_ops.matmul(reshape_57, parameter_113, False, False) + del parameter_113, reshape_57 + + # pd_op.add: (-1x49x192xf32) <- (-1x49x192xf32, 192xf32) + add_27 = paddle._C_ops.add(matmul_22, parameter_112) + del matmul_22, parameter_112 + + # pd_op.reshape: (-1x7x7x192xf32) <- (-1x49x192xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(add_27, full_int_array_33) + del add_27, full_int_array_33 + + # pd_op.reshape: (-1x4x4x7x7x192xf32) <- (-1x7x7x192xf32, 6xi64) + reshape_59 = paddle._C_ops.reshape(reshape_58, full_int_array_35) + del full_int_array_35, reshape_58 + + # pd_op.transpose: (-1x4x7x4x7x192xf32) <- (-1x4x4x7x7x192xf32) + transpose_26 = paddle._C_ops.transpose(reshape_59, [0, 1, 3, 2, 4, 5]) + del reshape_59 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x4x7x4x7x192xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(transpose_26, full_int_array_36) + del full_int_array_36, transpose_26 + + # pd_op.roll: (-1x28x28x192xf32) <- (-1x28x28x192xf32, 2xi64) + roll_3 = paddle._C_ops.roll(reshape_60, full_int_array_29, [1, 2]) + del reshape_60 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_27 = [slice_24, full_22, full_19] + del full_22, slice_24 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_26 = paddle._C_ops.stack(combine_27, 0) + del combine_27 + + # pd_op.reshape: (-1x784x192xf32) <- (-1x28x28x192xf32, 3xi64) + reshape_61 = paddle._C_ops.reshape(roll_3, stack_26) + del roll_3, stack_26 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, -1x784x192xf32) + add_28 = paddle._C_ops.add(add_23, reshape_61) + del add_23, reshape_61 + + # pd_op.layer_norm: (-1x784x192xf32, -1x784xf32, -1x784xf32) <- (-1x784x192xf32, 192xf32, 192xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_28, parameter_111, parameter_110, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_110, parameter_111 + + # pd_op.matmul: (-1x784x768xf32) <- (-1x784x192xf32, 192x768xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_27, parameter_109, False, False) + del layer_norm_27, parameter_109 + + # pd_op.add: (-1x784x768xf32) <- (-1x784x768xf32, 768xf32) + add_29 = paddle._C_ops.add(matmul_23, parameter_108) + del matmul_23, parameter_108 + + # pd_op.gelu: (-1x784x768xf32) <- (-1x784x768xf32) + gelu_3 = paddle._C_ops.gelu(add_29, False) + del add_29 + + # pd_op.matmul: (-1x784x192xf32) <- (-1x784x768xf32, 768x192xf32) + matmul_24 = paddle._C_ops.matmul(gelu_3, parameter_107, False, False) + del gelu_3, parameter_107 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, 192xf32) + add_30 = paddle._C_ops.add(matmul_24, parameter_106) + del matmul_24, parameter_106 + + # pd_op.add: (-1x784x192xf32) <- (-1x784x192xf32, -1x784x192xf32) + add_31 = paddle._C_ops.add(add_28, add_30) + del add_28, add_30 + + # pd_op.shape64: (3xi64) <- (-1x784x192xf32) + shape64_19 = paddle._C_ops.shape64(add_31) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + shape64_19, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_19 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_28 = [slice_31, full_18, full_18, full_19] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_27 = paddle._C_ops.stack(combine_28, 0) + del combine_28 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x784x192xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(add_31, stack_27) + del add_31, stack_27 + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x14x14x192xf32) <- (-1x28x28x192xf32, 2xi64, 2xi64, 2xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + reshape_62, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + + # pd_op.shape64: (4xi64) <- (-1x28x28x192xf32) + shape64_20 = paddle._C_ops.shape64(reshape_62) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + shape64_20, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_20 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_29 = [slice_32, full_18, full_18, full_19] + del full_18, full_19, slice_32 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_28 = paddle._C_ops.stack(combine_29, 0) + del combine_29 + + # pd_op.reshape: (-1x28x28x192xf32) <- (-1x28x28x192xf32, 4xi64) + reshape_63 = paddle._C_ops.reshape(reshape_62, stack_28) + del reshape_62, stack_28 + + # builtin.combine: ([-1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32]) <- (-1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32) + combine_30 = [ + strided_slice_4, + strided_slice_5, + strided_slice_6, + strided_slice_7, + ] + del strided_slice_4, strided_slice_5, strided_slice_6, strided_slice_7 + + # pd_op.concat: (-1x14x14x768xf32) <- ([-1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32, -1x14x14x192xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_30, full_15) + del combine_30 + + # pd_op.full: (xi64) <- () + full_28 = paddle._C_ops.full( + [], float("768"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_31 = [slice_31, full_16, full_28] + del slice_31 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_29 = paddle._C_ops.stack(combine_31, 0) + del combine_31 + + # pd_op.reshape: (-1x-1x768xf32) <- (-1x14x14x768xf32, 3xi64) + reshape_64 = paddle._C_ops.reshape(concat_1, stack_29) + del concat_1, stack_29 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_64, parameter_105, parameter_104, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_104, parameter_105, reshape_64 + + # pd_op.matmul: (-1x-1x384xf32) <- (-1x-1x768xf32, 768x384xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_30, parameter_103, False, False) + del layer_norm_30, parameter_103 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_21 = paddle._C_ops.shape64(matmul_25) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + shape64_21, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_21 + + # pd_op.shape64: (3xi64) <- (-1x-1x384xf32) + shape64_22 = paddle._C_ops.shape64(matmul_25) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + shape64_22, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_22 + + # pd_op.layer_norm: (-1x-1x384xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x384xf32, 384xf32, 384xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_25, parameter_102, parameter_101, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_101, parameter_102 + + # pd_op.full: (xi64) <- () + full_29 = paddle._C_ops.full( + [], float("14"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_32 = [slice_33, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_30 = paddle._C_ops.stack(combine_32, 0) + del combine_32 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x-1x384xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(layer_norm_33, stack_30) + del layer_norm_33, stack_30 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_23 = paddle._C_ops.shape64(reshape_65) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + shape64_23, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_23 + + # pd_op.full: (xi64) <- () + full_30 = paddle._C_ops.full( + [], float("2"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_33 = [slice_35, full_30, full_3, full_30, full_3, full_17] + del slice_35 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_31 = paddle._C_ops.stack(combine_33, 0) + del combine_33 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_66 = paddle._C_ops.reshape(reshape_65, stack_31) + del reshape_65, stack_31 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_27 = paddle._C_ops.transpose(reshape_66, [0, 1, 3, 2, 4, 5]) + del reshape_66 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_38 = [-1, 7, 7, 384] + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_67 = paddle._C_ops.reshape(transpose_27, full_int_array_38) + del transpose_27 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_39 = [-1, 49, 384] + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_68 = paddle._C_ops.reshape(reshape_67, full_int_array_39) + del reshape_67 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_24 = paddle._C_ops.shape64(reshape_68) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + shape64_24, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_24 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_26 = paddle._C_ops.matmul(reshape_68, parameter_100, False, False) + del parameter_100, reshape_68 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_32 = paddle._C_ops.add(matmul_26, parameter_99) + del matmul_26, parameter_99 + + # pd_op.full: (xi64) <- () + full_31 = paddle._C_ops.full( + [], float("12"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_34 = [slice_36, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_32 = paddle._C_ops.stack(combine_34, 0) + del combine_34 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_69 = paddle._C_ops.reshape(add_32, stack_32) + del add_32, stack_32 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_28 = paddle._C_ops.transpose(reshape_69, [2, 0, 3, 1, 4]) + del reshape_69 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + transpose_28, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_28 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_4 = paddle._C_ops.scale(slice_37, full_7, float("0"), True) + del slice_37 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_29 = paddle._C_ops.transpose(slice_38, [0, 1, 3, 2]) + del slice_38 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_27 = paddle._C_ops.matmul(scale_4, transpose_29, False, False) + del scale_4, transpose_29 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_70 = paddle._C_ops.reshape(data_9, full_int_array_7) + del data_9 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_4 = paddle._C_ops.index_select(data_10, reshape_70, 0) + del data_10, reshape_70 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_71 = paddle._C_ops.reshape(index_select_4, full_int_array_8) + del index_select_4 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_30 = paddle._C_ops.transpose(reshape_71, [2, 0, 1]) + del reshape_71 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_12 = paddle._C_ops.unsqueeze(transpose_30, full_int_array_0) + del transpose_30 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_33 = paddle._C_ops.add(matmul_27, unsqueeze_12) + del matmul_27, unsqueeze_12 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_4 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_28 = paddle._C_ops.matmul(softmax_4, slice_39, False, False) + del slice_39, softmax_4 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_31 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_35 = [slice_36, full_4, full_17] + del slice_36 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_33 = paddle._C_ops.stack(combine_35, 0) + del combine_35 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_72 = paddle._C_ops.reshape(transpose_31, stack_33) + del stack_33, transpose_31 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_29 = paddle._C_ops.matmul(reshape_72, parameter_98, False, False) + del parameter_98, reshape_72 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_34 = paddle._C_ops.add(matmul_29, parameter_97) + del matmul_29, parameter_97 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(add_34, full_int_array_38) + del add_34 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_40 = [-1, 2, 2, 7, 7, 384] + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_74 = paddle._C_ops.reshape(reshape_73, full_int_array_40) + del reshape_73 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_32 = paddle._C_ops.transpose(reshape_74, [0, 1, 3, 2, 4, 5]) + del reshape_74 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_41 = [-1, 14, 14, 384] + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_75 = paddle._C_ops.reshape(transpose_32, full_int_array_41) + del transpose_32 + + # pd_op.full: (xi64) <- () + full_32 = paddle._C_ops.full( + [], float("196"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_36 = [slice_33, full_32, full_17] + del slice_33 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_34 = paddle._C_ops.stack(combine_36, 0) + del combine_36 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_76 = paddle._C_ops.reshape(reshape_75, stack_34) + del reshape_75, stack_34 + + # pd_op.add: (-1x196x384xf32) <- (-1x-1x384xf32, -1x196x384xf32) + add_35 = paddle._C_ops.add(matmul_25, reshape_76) + del matmul_25, reshape_76 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_96, parameter_95, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_95, parameter_96 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_36, parameter_94, False, False) + del layer_norm_36, parameter_94 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_36 = paddle._C_ops.add(matmul_30, parameter_93) + del matmul_30, parameter_93 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_4 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_31 = paddle._C_ops.matmul(gelu_4, parameter_92, False, False) + del gelu_4, parameter_92 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_37 = paddle._C_ops.add(matmul_31, parameter_91) + del matmul_31, parameter_91 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_38 = paddle._C_ops.add(add_35, add_37) + del add_35, add_37 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_25 = paddle._C_ops.shape64(add_38) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + shape64_25, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_25 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_90, parameter_89, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_89, parameter_90 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_37 = [slice_40, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_35 = paddle._C_ops.stack(combine_37, 0) + del combine_37 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(layer_norm_39, stack_35) + del layer_norm_39, stack_35 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_26 = paddle._C_ops.shape64(reshape_77) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + shape64_26, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_26 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_4 = paddle._C_ops.roll(reshape_77, full_int_array_11, [1, 2]) + del reshape_77 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_27 = paddle._C_ops.shape64(roll_4) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + shape64_27, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_38 = [slice_42, full_30, full_3, full_30, full_3, full_17] + del slice_42 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_36 = paddle._C_ops.stack(combine_38, 0) + del combine_38 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_78 = paddle._C_ops.reshape(roll_4, stack_36) + del roll_4, stack_36 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_33 = paddle._C_ops.transpose(reshape_78, [0, 1, 3, 2, 4, 5]) + del reshape_78 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_79 = paddle._C_ops.reshape(transpose_33, full_int_array_38) + del transpose_33 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_80 = paddle._C_ops.reshape(reshape_79, full_int_array_39) + del reshape_79 + + # pd_op.full: (1x14x14x1xf32) <- () + full_33 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__22 = paddle._C_ops.set_value_( + full_33, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__23 = paddle._C_ops.set_value_( + set_value__22, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__22 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__24 = paddle._C_ops.set_value_( + set_value__23, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__23 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__25 = paddle._C_ops.set_value_( + set_value__24, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__24 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__26 = paddle._C_ops.set_value_( + set_value__25, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__25 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__27 = paddle._C_ops.set_value_( + set_value__26, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__26 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__28 = paddle._C_ops.set_value_( + set_value__27, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__27 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__29 = paddle._C_ops.set_value_( + set_value__28, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__28 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__2 = paddle._C_ops.set_value_( + set_value__29, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__29 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_42 = [1, 2, 7, 2, 7, 1] + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_81 = paddle._C_ops.reshape(set_value__2, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_34 = paddle._C_ops.transpose(reshape_81, [0, 1, 3, 2, 4, 5]) + del reshape_81 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(transpose_34, full_int_array_27) + del transpose_34 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_83 = paddle._C_ops.reshape(reshape_82, full_int_array_28) + del reshape_82 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_13 = paddle._C_ops.unsqueeze(reshape_83, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_14 = paddle._C_ops.unsqueeze(reshape_83, full_int_array_5) + del reshape_83 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_2 = paddle._C_ops.subtract(unsqueeze_13, unsqueeze_14) + del unsqueeze_13, unsqueeze_14 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_2 = paddle._C_ops.not_equal(subtract_2, full_10) + + # pd_op.full: (4x49x49xf32) <- () + full_34 = paddle._C_ops.full( + [4, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_4 = paddle._C_ops.where(not_equal_2, full_34, subtract_2) + del not_equal_2, subtract_2 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_2 = paddle._C_ops.equal(where_4, full_10) + + # pd_op.full: (4x49x49xf32) <- () + full_35 = paddle._C_ops.full( + [4, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_5 = paddle._C_ops.where(equal_2, full_35, where_4) + del equal_2, where_4 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_28 = paddle._C_ops.shape64(reshape_80) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + shape64_28, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_28 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_32 = paddle._C_ops.matmul(reshape_80, parameter_88, False, False) + del parameter_88, reshape_80 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_39 = paddle._C_ops.add(matmul_32, parameter_87) + del matmul_32, parameter_87 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_39 = [slice_43, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_37 = paddle._C_ops.stack(combine_39, 0) + del combine_39 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_84 = paddle._C_ops.reshape(add_39, stack_37) + del add_39, stack_37 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_35 = paddle._C_ops.transpose(reshape_84, [2, 0, 3, 1, 4]) + del reshape_84 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + transpose_35, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_35 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_5 = paddle._C_ops.scale(slice_44, full_7, float("0"), True) + del slice_44 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_36 = paddle._C_ops.transpose(slice_45, [0, 1, 3, 2]) + del slice_45 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_33 = paddle._C_ops.matmul(scale_5, transpose_36, False, False) + del scale_5, transpose_36 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_85 = paddle._C_ops.reshape(data_11, full_int_array_7) + del data_11 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_5 = paddle._C_ops.index_select(data_12, reshape_85, 0) + del data_12, reshape_85 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_86 = paddle._C_ops.reshape(index_select_5, full_int_array_8) + del index_select_5 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_37 = paddle._C_ops.transpose(reshape_86, [2, 0, 1]) + del reshape_86 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_15 = paddle._C_ops.unsqueeze(transpose_37, full_int_array_0) + del transpose_37 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_40 = paddle._C_ops.add(matmul_33, unsqueeze_15) + del matmul_33, unsqueeze_15 + + # pd_op.full: (xi64) <- () + full_36 = paddle._C_ops.full( + [], float("4"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_2 = paddle._C_ops.floor_divide(slice_43, full_36) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_40 = [floor_divide_2, full_20, full_31, full_4, full_4] + del floor_divide_2 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_38 = paddle._C_ops.stack(combine_40, 0) + del combine_40 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_87 = paddle._C_ops.reshape(add_40, stack_38) + del add_40, stack_38 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_16 = paddle._C_ops.unsqueeze(where_5, full_int_array_1) + del where_5 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_17 = paddle._C_ops.unsqueeze(unsqueeze_16, full_int_array_0) + del unsqueeze_16 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_41 = paddle._C_ops.add(reshape_87, unsqueeze_17) + del reshape_87, unsqueeze_17 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_41 = [slice_43, full_31, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_39 = paddle._C_ops.stack(combine_41, 0) + del combine_41 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(add_41, stack_39) + del add_41, stack_39 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_5 = paddle._C_ops.softmax(reshape_88, -1) + del reshape_88 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_34 = paddle._C_ops.matmul(softmax_5, slice_46, False, False) + del slice_46, softmax_5 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_38 = paddle._C_ops.transpose(matmul_34, [0, 2, 1, 3]) + del matmul_34 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_42 = [slice_43, full_4, full_17] + del slice_43 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_40 = paddle._C_ops.stack(combine_42, 0) + del combine_42 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_89 = paddle._C_ops.reshape(transpose_38, stack_40) + del stack_40, transpose_38 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_35 = paddle._C_ops.matmul(reshape_89, parameter_86, False, False) + del parameter_86, reshape_89 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_42 = paddle._C_ops.add(matmul_35, parameter_85) + del matmul_35, parameter_85 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(add_42, full_int_array_38) + del add_42 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_91 = paddle._C_ops.reshape(reshape_90, full_int_array_40) + del reshape_90 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_39 = paddle._C_ops.transpose(reshape_91, [0, 1, 3, 2, 4, 5]) + del reshape_91 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(transpose_39, full_int_array_41) + del transpose_39 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_5 = paddle._C_ops.roll(reshape_92, full_int_array_29, [1, 2]) + del reshape_92 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_43 = [slice_40, full_32, full_17] + del slice_40 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_41 = paddle._C_ops.stack(combine_43, 0) + del combine_43 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_93 = paddle._C_ops.reshape(roll_5, stack_41) + del roll_5, stack_41 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_43 = paddle._C_ops.add(add_38, reshape_93) + del add_38, reshape_93 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_43, parameter_84, parameter_83, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_83, parameter_84 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_36 = paddle._C_ops.matmul(layer_norm_42, parameter_82, False, False) + del layer_norm_42, parameter_82 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_44 = paddle._C_ops.add(matmul_36, parameter_81) + del matmul_36, parameter_81 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_5 = paddle._C_ops.gelu(add_44, False) + del add_44 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_37 = paddle._C_ops.matmul(gelu_5, parameter_80, False, False) + del gelu_5, parameter_80 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_45 = paddle._C_ops.add(matmul_37, parameter_79) + del matmul_37, parameter_79 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_46 = paddle._C_ops.add(add_43, add_45) + del add_43, add_45 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_29 = paddle._C_ops.shape64(add_46) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + shape64_29, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_29 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_46, parameter_78, parameter_77, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_77, parameter_78 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_44 = [slice_47, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_42 = paddle._C_ops.stack(combine_44, 0) + del combine_44 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_94 = paddle._C_ops.reshape(layer_norm_45, stack_42) + del layer_norm_45, stack_42 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_30 = paddle._C_ops.shape64(reshape_94) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_48 = paddle._C_ops.slice( + shape64_30, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_30 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_45 = [slice_48, full_30, full_3, full_30, full_3, full_17] + del slice_48 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_43 = paddle._C_ops.stack(combine_45, 0) + del combine_45 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_95 = paddle._C_ops.reshape(reshape_94, stack_43) + del reshape_94, stack_43 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_40 = paddle._C_ops.transpose(reshape_95, [0, 1, 3, 2, 4, 5]) + del reshape_95 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_96 = paddle._C_ops.reshape(transpose_40, full_int_array_38) + del transpose_40 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_97 = paddle._C_ops.reshape(reshape_96, full_int_array_39) + del reshape_96 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_31 = paddle._C_ops.shape64(reshape_97) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_49 = paddle._C_ops.slice( + shape64_31, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_31 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_38 = paddle._C_ops.matmul(reshape_97, parameter_76, False, False) + del parameter_76, reshape_97 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_47 = paddle._C_ops.add(matmul_38, parameter_75) + del matmul_38, parameter_75 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_46 = [slice_49, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_44 = paddle._C_ops.stack(combine_46, 0) + del combine_46 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_98 = paddle._C_ops.reshape(add_47, stack_44) + del add_47, stack_44 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_41 = paddle._C_ops.transpose(reshape_98, [2, 0, 3, 1, 4]) + del reshape_98 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_50 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_51 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_52 = paddle._C_ops.slice( + transpose_41, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_41 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_6 = paddle._C_ops.scale(slice_50, full_7, float("0"), True) + del slice_50 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_42 = paddle._C_ops.transpose(slice_51, [0, 1, 3, 2]) + del slice_51 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_39 = paddle._C_ops.matmul(scale_6, transpose_42, False, False) + del scale_6, transpose_42 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_99 = paddle._C_ops.reshape(data_13, full_int_array_7) + del data_13 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_6 = paddle._C_ops.index_select(data_14, reshape_99, 0) + del data_14, reshape_99 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_100 = paddle._C_ops.reshape(index_select_6, full_int_array_8) + del index_select_6 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_43 = paddle._C_ops.transpose(reshape_100, [2, 0, 1]) + del reshape_100 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_18 = paddle._C_ops.unsqueeze(transpose_43, full_int_array_0) + del transpose_43 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_48 = paddle._C_ops.add(matmul_39, unsqueeze_18) + del matmul_39, unsqueeze_18 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_6 = paddle._C_ops.softmax(add_48, -1) + del add_48 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_40 = paddle._C_ops.matmul(softmax_6, slice_52, False, False) + del slice_52, softmax_6 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_44 = paddle._C_ops.transpose(matmul_40, [0, 2, 1, 3]) + del matmul_40 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_47 = [slice_49, full_4, full_17] + del slice_49 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_45 = paddle._C_ops.stack(combine_47, 0) + del combine_47 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_101 = paddle._C_ops.reshape(transpose_44, stack_45) + del stack_45, transpose_44 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_41 = paddle._C_ops.matmul(reshape_101, parameter_74, False, False) + del parameter_74, reshape_101 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_49 = paddle._C_ops.add(matmul_41, parameter_73) + del matmul_41, parameter_73 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_102 = paddle._C_ops.reshape(add_49, full_int_array_38) + del add_49 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_103 = paddle._C_ops.reshape(reshape_102, full_int_array_40) + del reshape_102 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_45 = paddle._C_ops.transpose(reshape_103, [0, 1, 3, 2, 4, 5]) + del reshape_103 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_104 = paddle._C_ops.reshape(transpose_45, full_int_array_41) + del transpose_45 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_48 = [slice_47, full_32, full_17] + del slice_47 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_46 = paddle._C_ops.stack(combine_48, 0) + del combine_48 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_105 = paddle._C_ops.reshape(reshape_104, stack_46) + del reshape_104, stack_46 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_50 = paddle._C_ops.add(add_46, reshape_105) + del add_46, reshape_105 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_50, parameter_72, parameter_71, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_71, parameter_72 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_48, parameter_70, False, False) + del layer_norm_48, parameter_70 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_51 = paddle._C_ops.add(matmul_42, parameter_69) + del matmul_42, parameter_69 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_6 = paddle._C_ops.gelu(add_51, False) + del add_51 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_43 = paddle._C_ops.matmul(gelu_6, parameter_68, False, False) + del gelu_6, parameter_68 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_52 = paddle._C_ops.add(matmul_43, parameter_67) + del matmul_43, parameter_67 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_53 = paddle._C_ops.add(add_50, add_52) + del add_50, add_52 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_32 = paddle._C_ops.shape64(add_53) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_53 = paddle._C_ops.slice( + shape64_32, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_32 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_66, parameter_65, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_65, parameter_66 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_49 = [slice_53, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_47 = paddle._C_ops.stack(combine_49, 0) + del combine_49 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_106 = paddle._C_ops.reshape(layer_norm_51, stack_47) + del layer_norm_51, stack_47 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_33 = paddle._C_ops.shape64(reshape_106) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_54 = paddle._C_ops.slice( + shape64_33, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_33 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_6 = paddle._C_ops.roll(reshape_106, full_int_array_11, [1, 2]) + del reshape_106 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_34 = paddle._C_ops.shape64(roll_6) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_55 = paddle._C_ops.slice( + shape64_34, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_34 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_50 = [slice_55, full_30, full_3, full_30, full_3, full_17] + del slice_55 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_48 = paddle._C_ops.stack(combine_50, 0) + del combine_50 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_107 = paddle._C_ops.reshape(roll_6, stack_48) + del roll_6, stack_48 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_46 = paddle._C_ops.transpose(reshape_107, [0, 1, 3, 2, 4, 5]) + del reshape_107 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_108 = paddle._C_ops.reshape(transpose_46, full_int_array_38) + del transpose_46 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_109 = paddle._C_ops.reshape(reshape_108, full_int_array_39) + del reshape_108 + + # pd_op.full: (1x14x14x1xf32) <- () + full_37 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__30 = paddle._C_ops.set_value_( + full_37, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_37 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__31 = paddle._C_ops.set_value_( + set_value__30, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__30 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__32 = paddle._C_ops.set_value_( + set_value__31, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__31 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__33 = paddle._C_ops.set_value_( + set_value__32, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__32 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__34 = paddle._C_ops.set_value_( + set_value__33, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__33 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__35 = paddle._C_ops.set_value_( + set_value__34, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__34 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__36 = paddle._C_ops.set_value_( + set_value__35, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__35 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__37 = paddle._C_ops.set_value_( + set_value__36, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__36 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__3 = paddle._C_ops.set_value_( + set_value__37, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__37 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_110 = paddle._C_ops.reshape(set_value__3, full_int_array_42) + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_47 = paddle._C_ops.transpose(reshape_110, [0, 1, 3, 2, 4, 5]) + del reshape_110 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_111 = paddle._C_ops.reshape(transpose_47, full_int_array_27) + del transpose_47 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_112 = paddle._C_ops.reshape(reshape_111, full_int_array_28) + del reshape_111 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_19 = paddle._C_ops.unsqueeze(reshape_112, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_20 = paddle._C_ops.unsqueeze(reshape_112, full_int_array_5) + del reshape_112 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_3 = paddle._C_ops.subtract(unsqueeze_19, unsqueeze_20) + del unsqueeze_19, unsqueeze_20 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_3 = paddle._C_ops.not_equal(subtract_3, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_6 = paddle._C_ops.where(not_equal_3, full_34, subtract_3) + del not_equal_3, subtract_3 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_3 = paddle._C_ops.equal(where_6, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_7 = paddle._C_ops.where(equal_3, full_35, where_6) + del equal_3, where_6 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_35 = paddle._C_ops.shape64(reshape_109) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_56 = paddle._C_ops.slice( + shape64_35, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_35 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_44 = paddle._C_ops.matmul(reshape_109, parameter_64, False, False) + del parameter_64, reshape_109 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_54 = paddle._C_ops.add(matmul_44, parameter_63) + del matmul_44, parameter_63 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_51 = [slice_56, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_49 = paddle._C_ops.stack(combine_51, 0) + del combine_51 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_113 = paddle._C_ops.reshape(add_54, stack_49) + del add_54, stack_49 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_48 = paddle._C_ops.transpose(reshape_113, [2, 0, 3, 1, 4]) + del reshape_113 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_57 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_58 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_59 = paddle._C_ops.slice( + transpose_48, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_48 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_7 = paddle._C_ops.scale(slice_57, full_7, float("0"), True) + del slice_57 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_49 = paddle._C_ops.transpose(slice_58, [0, 1, 3, 2]) + del slice_58 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_45 = paddle._C_ops.matmul(scale_7, transpose_49, False, False) + del scale_7, transpose_49 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_114 = paddle._C_ops.reshape(data_15, full_int_array_7) + del data_15 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_7 = paddle._C_ops.index_select(data_16, reshape_114, 0) + del data_16, reshape_114 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_115 = paddle._C_ops.reshape(index_select_7, full_int_array_8) + del index_select_7 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_50 = paddle._C_ops.transpose(reshape_115, [2, 0, 1]) + del reshape_115 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_21 = paddle._C_ops.unsqueeze(transpose_50, full_int_array_0) + del transpose_50 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_55 = paddle._C_ops.add(matmul_45, unsqueeze_21) + del matmul_45, unsqueeze_21 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_3 = paddle._C_ops.floor_divide(slice_56, full_36) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_52 = [floor_divide_3, full_20, full_31, full_4, full_4] + del floor_divide_3 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_50 = paddle._C_ops.stack(combine_52, 0) + del combine_52 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_116 = paddle._C_ops.reshape(add_55, stack_50) + del add_55, stack_50 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_22 = paddle._C_ops.unsqueeze(where_7, full_int_array_1) + del where_7 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_23 = paddle._C_ops.unsqueeze(unsqueeze_22, full_int_array_0) + del unsqueeze_22 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_56 = paddle._C_ops.add(reshape_116, unsqueeze_23) + del reshape_116, unsqueeze_23 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_53 = [slice_56, full_31, full_4, full_4] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_51 = paddle._C_ops.stack(combine_53, 0) + del combine_53 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_117 = paddle._C_ops.reshape(add_56, stack_51) + del add_56, stack_51 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_7 = paddle._C_ops.softmax(reshape_117, -1) + del reshape_117 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_46 = paddle._C_ops.matmul(softmax_7, slice_59, False, False) + del slice_59, softmax_7 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_51 = paddle._C_ops.transpose(matmul_46, [0, 2, 1, 3]) + del matmul_46 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_54 = [slice_56, full_4, full_17] + del slice_56 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_52 = paddle._C_ops.stack(combine_54, 0) + del combine_54 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_118 = paddle._C_ops.reshape(transpose_51, stack_52) + del stack_52, transpose_51 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_47 = paddle._C_ops.matmul(reshape_118, parameter_62, False, False) + del parameter_62, reshape_118 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_57 = paddle._C_ops.add(matmul_47, parameter_61) + del matmul_47, parameter_61 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_119 = paddle._C_ops.reshape(add_57, full_int_array_38) + del add_57 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_120 = paddle._C_ops.reshape(reshape_119, full_int_array_40) + del reshape_119 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_52 = paddle._C_ops.transpose(reshape_120, [0, 1, 3, 2, 4, 5]) + del reshape_120 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_121 = paddle._C_ops.reshape(transpose_52, full_int_array_41) + del transpose_52 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_7 = paddle._C_ops.roll(reshape_121, full_int_array_29, [1, 2]) + del reshape_121 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_55 = [slice_53, full_32, full_17] + del slice_53 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_53 = paddle._C_ops.stack(combine_55, 0) + del combine_55 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_122 = paddle._C_ops.reshape(roll_7, stack_53) + del roll_7, stack_53 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_58 = paddle._C_ops.add(add_53, reshape_122) + del add_53, reshape_122 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_58, parameter_60, parameter_59, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_59, parameter_60 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_54, parameter_58, False, False) + del layer_norm_54, parameter_58 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_59 = paddle._C_ops.add(matmul_48, parameter_57) + del matmul_48, parameter_57 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_7 = paddle._C_ops.gelu(add_59, False) + del add_59 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_49 = paddle._C_ops.matmul(gelu_7, parameter_56, False, False) + del gelu_7, parameter_56 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_60 = paddle._C_ops.add(matmul_49, parameter_55) + del matmul_49, parameter_55 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_61 = paddle._C_ops.add(add_58, add_60) + del add_58, add_60 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_36 = paddle._C_ops.shape64(add_61) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_60 = paddle._C_ops.slice( + shape64_36, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_36 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_61, parameter_54, parameter_53, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_53, parameter_54 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_56 = [slice_60, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_54 = paddle._C_ops.stack(combine_56, 0) + del combine_56 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_123 = paddle._C_ops.reshape(layer_norm_57, stack_54) + del layer_norm_57, stack_54 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_37 = paddle._C_ops.shape64(reshape_123) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_61 = paddle._C_ops.slice( + shape64_37, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_37 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_57 = [slice_61, full_30, full_3, full_30, full_3, full_17] + del slice_61 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_55 = paddle._C_ops.stack(combine_57, 0) + del combine_57 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_124 = paddle._C_ops.reshape(reshape_123, stack_55) + del reshape_123, stack_55 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_53 = paddle._C_ops.transpose(reshape_124, [0, 1, 3, 2, 4, 5]) + del reshape_124 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_125 = paddle._C_ops.reshape(transpose_53, full_int_array_38) + del transpose_53 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_126 = paddle._C_ops.reshape(reshape_125, full_int_array_39) + del reshape_125 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_38 = paddle._C_ops.shape64(reshape_126) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_62 = paddle._C_ops.slice( + shape64_38, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_38 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_50 = paddle._C_ops.matmul(reshape_126, parameter_52, False, False) + del parameter_52, reshape_126 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_62 = paddle._C_ops.add(matmul_50, parameter_51) + del matmul_50, parameter_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_58 = [slice_62, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_56 = paddle._C_ops.stack(combine_58, 0) + del combine_58 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_127 = paddle._C_ops.reshape(add_62, stack_56) + del add_62, stack_56 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_54 = paddle._C_ops.transpose(reshape_127, [2, 0, 3, 1, 4]) + del reshape_127 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_63 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_64 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_65 = paddle._C_ops.slice( + transpose_54, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_54 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_8 = paddle._C_ops.scale(slice_63, full_7, float("0"), True) + del slice_63 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_55 = paddle._C_ops.transpose(slice_64, [0, 1, 3, 2]) + del slice_64 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_51 = paddle._C_ops.matmul(scale_8, transpose_55, False, False) + del scale_8, transpose_55 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_128 = paddle._C_ops.reshape(data_17, full_int_array_7) + del data_17 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_8 = paddle._C_ops.index_select(data_18, reshape_128, 0) + del data_18, reshape_128 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_129 = paddle._C_ops.reshape(index_select_8, full_int_array_8) + del index_select_8 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_56 = paddle._C_ops.transpose(reshape_129, [2, 0, 1]) + del reshape_129 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_24 = paddle._C_ops.unsqueeze(transpose_56, full_int_array_0) + del transpose_56 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_63 = paddle._C_ops.add(matmul_51, unsqueeze_24) + del matmul_51, unsqueeze_24 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_8 = paddle._C_ops.softmax(add_63, -1) + del add_63 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_52 = paddle._C_ops.matmul(softmax_8, slice_65, False, False) + del slice_65, softmax_8 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_57 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_59 = [slice_62, full_4, full_17] + del slice_62 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_57 = paddle._C_ops.stack(combine_59, 0) + del combine_59 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_130 = paddle._C_ops.reshape(transpose_57, stack_57) + del stack_57, transpose_57 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_53 = paddle._C_ops.matmul(reshape_130, parameter_50, False, False) + del parameter_50, reshape_130 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_64 = paddle._C_ops.add(matmul_53, parameter_49) + del matmul_53, parameter_49 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_131 = paddle._C_ops.reshape(add_64, full_int_array_38) + del add_64 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_132 = paddle._C_ops.reshape(reshape_131, full_int_array_40) + del reshape_131 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_58 = paddle._C_ops.transpose(reshape_132, [0, 1, 3, 2, 4, 5]) + del reshape_132 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_133 = paddle._C_ops.reshape(transpose_58, full_int_array_41) + del transpose_58 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_60 = [slice_60, full_32, full_17] + del slice_60 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_58 = paddle._C_ops.stack(combine_60, 0) + del combine_60 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_134 = paddle._C_ops.reshape(reshape_133, stack_58) + del reshape_133, stack_58 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_65 = paddle._C_ops.add(add_61, reshape_134) + del add_61, reshape_134 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_65, parameter_48, parameter_47, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_47, parameter_48 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_60, parameter_46, False, False) + del layer_norm_60, parameter_46 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_66 = paddle._C_ops.add(matmul_54, parameter_45) + del matmul_54, parameter_45 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_8 = paddle._C_ops.gelu(add_66, False) + del add_66 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_55 = paddle._C_ops.matmul(gelu_8, parameter_44, False, False) + del gelu_8, parameter_44 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_67 = paddle._C_ops.add(matmul_55, parameter_43) + del matmul_55, parameter_43 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_68 = paddle._C_ops.add(add_65, add_67) + del add_65, add_67 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_39 = paddle._C_ops.shape64(add_68) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_66 = paddle._C_ops.slice( + shape64_39, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_39 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_68, parameter_42, parameter_41, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_41, parameter_42 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_61 = [slice_66, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_59 = paddle._C_ops.stack(combine_61, 0) + del combine_61 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_135 = paddle._C_ops.reshape(layer_norm_63, stack_59) + del layer_norm_63, stack_59 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_40 = paddle._C_ops.shape64(reshape_135) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_67 = paddle._C_ops.slice( + shape64_40, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_40 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_8 = paddle._C_ops.roll(reshape_135, full_int_array_11, [1, 2]) + del reshape_135 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_41 = paddle._C_ops.shape64(roll_8) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_68 = paddle._C_ops.slice( + shape64_41, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_41 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_62 = [slice_68, full_30, full_3, full_30, full_3, full_17] + del full_30, slice_68 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_60 = paddle._C_ops.stack(combine_62, 0) + del combine_62 + + # pd_op.reshape: (-1x2x7x2x7x384xf32) <- (-1x14x14x384xf32, 6xi64) + reshape_136 = paddle._C_ops.reshape(roll_8, stack_60) + del roll_8, stack_60 + + # pd_op.transpose: (-1x2x2x7x7x384xf32) <- (-1x2x7x2x7x384xf32) + transpose_59 = paddle._C_ops.transpose(reshape_136, [0, 1, 3, 2, 4, 5]) + del reshape_136 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x2x2x7x7x384xf32, 4xi64) + reshape_137 = paddle._C_ops.reshape(transpose_59, full_int_array_38) + del transpose_59 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x7x7x384xf32, 3xi64) + reshape_138 = paddle._C_ops.reshape(reshape_137, full_int_array_39) + del full_int_array_39, reshape_137 + + # pd_op.full: (1x14x14x1xf32) <- () + full_38 = paddle._C_ops.full( + [1, 14, 14, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__38 = paddle._C_ops.set_value_( + full_38, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__39 = paddle._C_ops.set_value_( + set_value__38, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del set_value__38 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__40 = paddle._C_ops.set_value_( + set_value__39, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del set_value__39 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__41 = paddle._C_ops.set_value_( + set_value__40, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del set_value__40 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__42 = paddle._C_ops.set_value_( + set_value__41, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del set_value__41 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__43 = paddle._C_ops.set_value_( + set_value__42, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del set_value__42 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__44 = paddle._C_ops.set_value_( + set_value__43, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del set_value__43 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__45 = paddle._C_ops.set_value_( + set_value__44, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del set_value__44 + + # pd_op.set_value_: (1x14x14x1xf32) <- (1x14x14x1xf32, 2xi64, 2xi64, 2xi64) + set_value__4 = paddle._C_ops.set_value_( + set_value__45, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del set_value__45 + + # pd_op.reshape: (1x2x7x2x7x1xf32) <- (1x14x14x1xf32, 6xi64) + reshape_139 = paddle._C_ops.reshape(set_value__4, full_int_array_42) + del full_int_array_42 + + # pd_op.transpose: (1x2x2x7x7x1xf32) <- (1x2x7x2x7x1xf32) + transpose_60 = paddle._C_ops.transpose(reshape_139, [0, 1, 3, 2, 4, 5]) + del reshape_139 + + # pd_op.reshape: (4x7x7x1xf32) <- (1x2x2x7x7x1xf32, 4xi64) + reshape_140 = paddle._C_ops.reshape(transpose_60, full_int_array_27) + del transpose_60 + + # pd_op.reshape: (4x49xf32) <- (4x7x7x1xf32, 2xi64) + reshape_141 = paddle._C_ops.reshape(reshape_140, full_int_array_28) + del reshape_140 + + # pd_op.unsqueeze: (4x1x49xf32) <- (4x49xf32, 1xi64) + unsqueeze_25 = paddle._C_ops.unsqueeze(reshape_141, full_int_array_1) + + # pd_op.unsqueeze: (4x49x1xf32) <- (4x49xf32, 1xi64) + unsqueeze_26 = paddle._C_ops.unsqueeze(reshape_141, full_int_array_5) + del reshape_141 + + # pd_op.subtract: (4x49x49xf32) <- (4x1x49xf32, 4x49x1xf32) + subtract_4 = paddle._C_ops.subtract(unsqueeze_25, unsqueeze_26) + del unsqueeze_25, unsqueeze_26 + + # pd_op.not_equal: (4x49x49xb) <- (4x49x49xf32, xf32) + not_equal_4 = paddle._C_ops.not_equal(subtract_4, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_8 = paddle._C_ops.where(not_equal_4, full_34, subtract_4) + del full_34, not_equal_4, subtract_4 + + # pd_op.equal: (4x49x49xb) <- (4x49x49xf32, xf32) + equal_4 = paddle._C_ops.equal(where_8, full_10) + + # pd_op.where: (4x49x49xf32) <- (4x49x49xb, 4x49x49xf32, 4x49x49xf32) + where_9 = paddle._C_ops.where(equal_4, full_35, where_8) + del equal_4, full_35, where_8 + + # pd_op.shape64: (3xi64) <- (-1x49x384xf32) + shape64_42 = paddle._C_ops.shape64(reshape_138) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_69 = paddle._C_ops.slice( + shape64_42, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_42 + + # pd_op.matmul: (-1x49x1152xf32) <- (-1x49x384xf32, 384x1152xf32) + matmul_56 = paddle._C_ops.matmul(reshape_138, parameter_40, False, False) + del parameter_40, reshape_138 + + # pd_op.add: (-1x49x1152xf32) <- (-1x49x1152xf32, 1152xf32) + add_69 = paddle._C_ops.add(matmul_56, parameter_39) + del matmul_56, parameter_39 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_63 = [slice_69, full_4, full_5, full_31, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_61 = paddle._C_ops.stack(combine_63, 0) + del combine_63 + + # pd_op.reshape: (-1x49x3x12x32xf32) <- (-1x49x1152xf32, 5xi64) + reshape_142 = paddle._C_ops.reshape(add_69, stack_61) + del add_69, stack_61 + + # pd_op.transpose: (3x-1x12x49x32xf32) <- (-1x49x3x12x32xf32) + transpose_61 = paddle._C_ops.transpose(reshape_142, [2, 0, 3, 1, 4]) + del reshape_142 + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_70 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_71 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x12x49x32xf32) <- (3x-1x12x49x32xf32, 1xi64, 1xi64) + slice_72 = paddle._C_ops.slice( + transpose_61, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_61 + + # pd_op.scale: (-1x12x49x32xf32) <- (-1x12x49x32xf32, 1xf32) + scale_9 = paddle._C_ops.scale(slice_70, full_7, float("0"), True) + del slice_70 + + # pd_op.transpose: (-1x12x32x49xf32) <- (-1x12x49x32xf32) + transpose_62 = paddle._C_ops.transpose(slice_71, [0, 1, 3, 2]) + del slice_71 + + # pd_op.matmul: (-1x12x49x49xf32) <- (-1x12x49x32xf32, -1x12x32x49xf32) + matmul_57 = paddle._C_ops.matmul(scale_9, transpose_62, False, False) + del scale_9, transpose_62 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_143 = paddle._C_ops.reshape(data_19, full_int_array_7) + del data_19 + + # pd_op.index_select: (2401x12xf32) <- (169x12xf32, 2401xi64) + index_select_9 = paddle._C_ops.index_select(data_20, reshape_143, 0) + del data_20, reshape_143 + + # pd_op.reshape: (49x49x12xf32) <- (2401x12xf32, 3xi64) + reshape_144 = paddle._C_ops.reshape(index_select_9, full_int_array_8) + del index_select_9 + + # pd_op.transpose: (12x49x49xf32) <- (49x49x12xf32) + transpose_63 = paddle._C_ops.transpose(reshape_144, [2, 0, 1]) + del reshape_144 + + # pd_op.unsqueeze: (1x12x49x49xf32) <- (12x49x49xf32, 1xi64) + unsqueeze_27 = paddle._C_ops.unsqueeze(transpose_63, full_int_array_0) + del transpose_63 + + # pd_op.add: (-1x12x49x49xf32) <- (-1x12x49x49xf32, 1x12x49x49xf32) + add_70 = paddle._C_ops.add(matmul_57, unsqueeze_27) + del matmul_57, unsqueeze_27 + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_4 = paddle._C_ops.floor_divide(slice_69, full_36) + del full_36 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_64 = [floor_divide_4, full_20, full_31, full_4, full_4] + del floor_divide_4, full_20 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_62 = paddle._C_ops.stack(combine_64, 0) + del combine_64 + + # pd_op.reshape: (-1x4x12x49x49xf32) <- (-1x12x49x49xf32, 5xi64) + reshape_145 = paddle._C_ops.reshape(add_70, stack_62) + del add_70, stack_62 + + # pd_op.unsqueeze: (4x1x49x49xf32) <- (4x49x49xf32, 1xi64) + unsqueeze_28 = paddle._C_ops.unsqueeze(where_9, full_int_array_1) + del where_9 + + # pd_op.unsqueeze: (1x4x1x49x49xf32) <- (4x1x49x49xf32, 1xi64) + unsqueeze_29 = paddle._C_ops.unsqueeze(unsqueeze_28, full_int_array_0) + del unsqueeze_28 + + # pd_op.add: (-1x4x12x49x49xf32) <- (-1x4x12x49x49xf32, 1x4x1x49x49xf32) + add_71 = paddle._C_ops.add(reshape_145, unsqueeze_29) + del reshape_145, unsqueeze_29 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_65 = [slice_69, full_31, full_4, full_4] + del full_31 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_63 = paddle._C_ops.stack(combine_65, 0) + del combine_65 + + # pd_op.reshape: (-1x12x49x49xf32) <- (-1x4x12x49x49xf32, 4xi64) + reshape_146 = paddle._C_ops.reshape(add_71, stack_63) + del add_71, stack_63 + + # pd_op.softmax: (-1x12x49x49xf32) <- (-1x12x49x49xf32) + softmax_9 = paddle._C_ops.softmax(reshape_146, -1) + del reshape_146 + + # pd_op.matmul: (-1x12x49x32xf32) <- (-1x12x49x49xf32, -1x12x49x32xf32) + matmul_58 = paddle._C_ops.matmul(softmax_9, slice_72, False, False) + del slice_72, softmax_9 + + # pd_op.transpose: (-1x49x12x32xf32) <- (-1x12x49x32xf32) + transpose_64 = paddle._C_ops.transpose(matmul_58, [0, 2, 1, 3]) + del matmul_58 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_66 = [slice_69, full_4, full_17] + del slice_69 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_64 = paddle._C_ops.stack(combine_66, 0) + del combine_66 + + # pd_op.reshape: (-1x49x384xf32) <- (-1x49x12x32xf32, 3xi64) + reshape_147 = paddle._C_ops.reshape(transpose_64, stack_64) + del stack_64, transpose_64 + + # pd_op.matmul: (-1x49x384xf32) <- (-1x49x384xf32, 384x384xf32) + matmul_59 = paddle._C_ops.matmul(reshape_147, parameter_38, False, False) + del parameter_38, reshape_147 + + # pd_op.add: (-1x49x384xf32) <- (-1x49x384xf32, 384xf32) + add_72 = paddle._C_ops.add(matmul_59, parameter_37) + del matmul_59, parameter_37 + + # pd_op.reshape: (-1x7x7x384xf32) <- (-1x49x384xf32, 4xi64) + reshape_148 = paddle._C_ops.reshape(add_72, full_int_array_38) + del add_72, full_int_array_38 + + # pd_op.reshape: (-1x2x2x7x7x384xf32) <- (-1x7x7x384xf32, 6xi64) + reshape_149 = paddle._C_ops.reshape(reshape_148, full_int_array_40) + del full_int_array_40, reshape_148 + + # pd_op.transpose: (-1x2x7x2x7x384xf32) <- (-1x2x2x7x7x384xf32) + transpose_65 = paddle._C_ops.transpose(reshape_149, [0, 1, 3, 2, 4, 5]) + del reshape_149 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x2x7x2x7x384xf32, 4xi64) + reshape_150 = paddle._C_ops.reshape(transpose_65, full_int_array_41) + del full_int_array_41, transpose_65 + + # pd_op.roll: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 2xi64) + roll_9 = paddle._C_ops.roll(reshape_150, full_int_array_29, [1, 2]) + del reshape_150 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_67 = [slice_66, full_32, full_17] + del full_32, slice_66 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_65 = paddle._C_ops.stack(combine_67, 0) + del combine_67 + + # pd_op.reshape: (-1x196x384xf32) <- (-1x14x14x384xf32, 3xi64) + reshape_151 = paddle._C_ops.reshape(roll_9, stack_65) + del roll_9, stack_65 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_73 = paddle._C_ops.add(add_68, reshape_151) + del add_68, reshape_151 + + # pd_op.layer_norm: (-1x196x384xf32, -1x196xf32, -1x196xf32) <- (-1x196x384xf32, 384xf32, 384xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_36, parameter_35, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_35, parameter_36 + + # pd_op.matmul: (-1x196x1536xf32) <- (-1x196x384xf32, 384x1536xf32) + matmul_60 = paddle._C_ops.matmul(layer_norm_66, parameter_34, False, False) + del layer_norm_66, parameter_34 + + # pd_op.add: (-1x196x1536xf32) <- (-1x196x1536xf32, 1536xf32) + add_74 = paddle._C_ops.add(matmul_60, parameter_33) + del matmul_60, parameter_33 + + # pd_op.gelu: (-1x196x1536xf32) <- (-1x196x1536xf32) + gelu_9 = paddle._C_ops.gelu(add_74, False) + del add_74 + + # pd_op.matmul: (-1x196x384xf32) <- (-1x196x1536xf32, 1536x384xf32) + matmul_61 = paddle._C_ops.matmul(gelu_9, parameter_32, False, False) + del gelu_9, parameter_32 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, 384xf32) + add_75 = paddle._C_ops.add(matmul_61, parameter_31) + del matmul_61, parameter_31 + + # pd_op.add: (-1x196x384xf32) <- (-1x196x384xf32, -1x196x384xf32) + add_76 = paddle._C_ops.add(add_73, add_75) + del add_73, add_75 + + # pd_op.shape64: (3xi64) <- (-1x196x384xf32) + shape64_43 = paddle._C_ops.shape64(add_76) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_73 = paddle._C_ops.slice( + shape64_43, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_43 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_68 = [slice_73, full_29, full_29, full_17] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_66 = paddle._C_ops.stack(combine_68, 0) + del combine_68 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x196x384xf32, 4xi64) + reshape_152 = paddle._C_ops.reshape(add_76, stack_66) + del add_76, stack_66 + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + reshape_152, [1, 2], full_int_array_12, full_int_array_25, full_int_array_30 + ) + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + reshape_152, [1, 2], full_int_array_31, full_int_array_25, full_int_array_30 + ) + del full_int_array_31 + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + reshape_152, [1, 2], full_int_array_32, full_int_array_25, full_int_array_30 + ) + del full_int_array_32 + + # pd_op.strided_slice: (-1x7x7x384xf32) <- (-1x14x14x384xf32, 2xi64, 2xi64, 2xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + reshape_152, [1, 2], full_int_array_14, full_int_array_25, full_int_array_30 + ) + del full_int_array_30 + + # pd_op.shape64: (4xi64) <- (-1x14x14x384xf32) + shape64_44 = paddle._C_ops.shape64(reshape_152) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_74 = paddle._C_ops.slice( + shape64_44, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_44 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_69 = [slice_74, full_29, full_29, full_17] + del full_17, full_29, slice_74 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_67 = paddle._C_ops.stack(combine_69, 0) + del combine_69 + + # pd_op.reshape: (-1x14x14x384xf32) <- (-1x14x14x384xf32, 4xi64) + reshape_153 = paddle._C_ops.reshape(reshape_152, stack_67) + del reshape_152, stack_67 + + # builtin.combine: ([-1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32]) <- (-1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32) + combine_70 = [ + strided_slice_8, + strided_slice_9, + strided_slice_10, + strided_slice_11, + ] + del strided_slice_10, strided_slice_11, strided_slice_8, strided_slice_9 + + # pd_op.concat: (-1x7x7x1536xf32) <- ([-1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32, -1x7x7x384xf32], 1xi32) + concat_2 = paddle._C_ops.concat(combine_70, full_15) + del combine_70, full_15 + + # pd_op.full: (xi64) <- () + full_39 = paddle._C_ops.full( + [], float("1536"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_71 = [slice_73, full_16, full_39] + del full_16, full_39, slice_73 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_68 = paddle._C_ops.stack(combine_71, 0) + del combine_71 + + # pd_op.reshape: (-1x-1x1536xf32) <- (-1x7x7x1536xf32, 3xi64) + reshape_154 = paddle._C_ops.reshape(concat_2, stack_68) + del concat_2, stack_68 + + # pd_op.layer_norm: (-1x-1x1536xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x1536xf32, 1536xf32, 1536xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + reshape_154, parameter_30, parameter_29, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_29, parameter_30, reshape_154 + + # pd_op.matmul: (-1x-1x768xf32) <- (-1x-1x1536xf32, 1536x768xf32) + matmul_62 = paddle._C_ops.matmul(layer_norm_69, parameter_28, False, False) + del layer_norm_69, parameter_28 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_45 = paddle._C_ops.shape64(matmul_62) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_75 = paddle._C_ops.slice( + shape64_45, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_45 + + # pd_op.shape64: (3xi64) <- (-1x-1x768xf32) + shape64_46 = paddle._C_ops.shape64(matmul_62) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_76 = paddle._C_ops.slice( + shape64_46, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + del shape64_46 + + # pd_op.layer_norm: (-1x-1x768xf32, -1x-1xf32, -1x-1xf32) <- (-1x-1x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + matmul_62, parameter_27, parameter_26, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_26, parameter_27 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_72 = [slice_75, full_3, full_3, full_28] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_69 = paddle._C_ops.stack(combine_72, 0) + del combine_72 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x-1x768xf32, 4xi64) + reshape_155 = paddle._C_ops.reshape(layer_norm_72, stack_69) + del layer_norm_72, stack_69 + + # pd_op.shape64: (4xi64) <- (-1x7x7x768xf32) + shape64_47 = paddle._C_ops.shape64(reshape_155) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_77 = paddle._C_ops.slice( + shape64_47, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_47 + + # pd_op.full: (xi64) <- () + full_40 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_73 = [slice_77, full_40, full_3, full_40, full_3, full_28] + del slice_77 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_70 = paddle._C_ops.stack(combine_73, 0) + del combine_73 + + # pd_op.reshape: (-1x1x7x1x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_156 = paddle._C_ops.reshape(reshape_155, stack_70) + del reshape_155, stack_70 + + # pd_op.transpose: (-1x1x1x7x7x768xf32) <- (-1x1x7x1x7x768xf32) + transpose_66 = paddle._C_ops.transpose(reshape_156, [0, 1, 3, 2, 4, 5]) + del reshape_156 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_43 = [-1, 7, 7, 768] + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x1x7x7x768xf32, 4xi64) + reshape_157 = paddle._C_ops.reshape(transpose_66, full_int_array_43) + del transpose_66 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_44 = [-1, 49, 768] + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_158 = paddle._C_ops.reshape(reshape_157, full_int_array_44) + del reshape_157 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_48 = paddle._C_ops.shape64(reshape_158) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_78 = paddle._C_ops.slice( + shape64_48, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_48 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_63 = paddle._C_ops.matmul(reshape_158, parameter_25, False, False) + del parameter_25, reshape_158 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_77 = paddle._C_ops.add(matmul_63, parameter_24) + del matmul_63, parameter_24 + + # pd_op.full: (xi64) <- () + full_41 = paddle._C_ops.full( + [], float("24"), paddle.int64, paddle.core.CPUPlace() + ) + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_74 = [slice_78, full_4, full_5, full_41, full_6] + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_71 = paddle._C_ops.stack(combine_74, 0) + del combine_74 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_159 = paddle._C_ops.reshape(add_77, stack_71) + del add_77, stack_71 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_67 = paddle._C_ops.transpose(reshape_159, [2, 0, 3, 1, 4]) + del reshape_159 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_79 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_80 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_81 = paddle._C_ops.slice( + transpose_67, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del transpose_67 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_10 = paddle._C_ops.scale(slice_79, full_7, float("0"), True) + del slice_79 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_68 = paddle._C_ops.transpose(slice_80, [0, 1, 3, 2]) + del slice_80 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_64 = paddle._C_ops.matmul(scale_10, transpose_68, False, False) + del scale_10, transpose_68 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_160 = paddle._C_ops.reshape(data_21, full_int_array_7) + del data_21 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_10 = paddle._C_ops.index_select(data_22, reshape_160, 0) + del data_22, reshape_160 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_161 = paddle._C_ops.reshape(index_select_10, full_int_array_8) + del index_select_10 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_69 = paddle._C_ops.transpose(reshape_161, [2, 0, 1]) + del reshape_161 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_30 = paddle._C_ops.unsqueeze(transpose_69, full_int_array_0) + del transpose_69 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_78 = paddle._C_ops.add(matmul_64, unsqueeze_30) + del matmul_64, unsqueeze_30 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_10 = paddle._C_ops.softmax(add_78, -1) + del add_78 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_65 = paddle._C_ops.matmul(softmax_10, slice_81, False, False) + del slice_81, softmax_10 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_70 = paddle._C_ops.transpose(matmul_65, [0, 2, 1, 3]) + del matmul_65 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_75 = [slice_78, full_4, full_28] + del slice_78 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_72 = paddle._C_ops.stack(combine_75, 0) + del combine_75 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_162 = paddle._C_ops.reshape(transpose_70, stack_72) + del stack_72, transpose_70 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(reshape_162, parameter_23, False, False) + del parameter_23, reshape_162 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_79 = paddle._C_ops.add(matmul_66, parameter_22) + del matmul_66, parameter_22 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_163 = paddle._C_ops.reshape(add_79, full_int_array_43) + del add_79 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_45 = [-1, 1, 1, 7, 7, 768] + + # pd_op.reshape: (-1x1x1x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_164 = paddle._C_ops.reshape(reshape_163, full_int_array_45) + del reshape_163 + + # pd_op.transpose: (-1x1x7x1x7x768xf32) <- (-1x1x1x7x7x768xf32) + transpose_71 = paddle._C_ops.transpose(reshape_164, [0, 1, 3, 2, 4, 5]) + del reshape_164 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x7x1x7x768xf32, 4xi64) + reshape_165 = paddle._C_ops.reshape(transpose_71, full_int_array_43) + del transpose_71 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_76 = [slice_75, full_4, full_28] + del slice_75 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_73 = paddle._C_ops.stack(combine_76, 0) + del combine_76 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_166 = paddle._C_ops.reshape(reshape_165, stack_73) + del reshape_165, stack_73 + + # pd_op.add: (-1x49x768xf32) <- (-1x-1x768xf32, -1x49x768xf32) + add_80 = paddle._C_ops.add(matmul_62, reshape_166) + del matmul_62, reshape_166 + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_75, layer_norm_76, layer_norm_77 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_80, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_20, parameter_21 + + # pd_op.matmul: (-1x49x3072xf32) <- (-1x49x768xf32, 768x3072xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_75, parameter_19, False, False) + del layer_norm_75, parameter_19 + + # pd_op.add: (-1x49x3072xf32) <- (-1x49x3072xf32, 3072xf32) + add_81 = paddle._C_ops.add(matmul_67, parameter_18) + del matmul_67, parameter_18 + + # pd_op.gelu: (-1x49x3072xf32) <- (-1x49x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_81, False) + del add_81 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x3072xf32, 3072x768xf32) + matmul_68 = paddle._C_ops.matmul(gelu_10, parameter_17, False, False) + del gelu_10, parameter_17 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_82 = paddle._C_ops.add(matmul_68, parameter_16) + del matmul_68, parameter_16 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, -1x49x768xf32) + add_83 = paddle._C_ops.add(add_80, add_82) + del add_80, add_82 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_49 = paddle._C_ops.shape64(add_83) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_82 = paddle._C_ops.slice( + shape64_49, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_49 + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_78, layer_norm_79, layer_norm_80 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_83, parameter_15, parameter_14, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_14, parameter_15 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_77 = [slice_82, full_3, full_3, full_28] + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_74 = paddle._C_ops.stack(combine_77, 0) + del combine_77 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_167 = paddle._C_ops.reshape(layer_norm_78, stack_74) + del layer_norm_78, stack_74 + + # pd_op.shape64: (4xi64) <- (-1x7x7x768xf32) + shape64_50 = paddle._C_ops.shape64(reshape_167) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_83 = paddle._C_ops.slice( + shape64_50, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_50 + + # pd_op.roll: (-1x7x7x768xf32) <- (-1x7x7x768xf32, 2xi64) + roll_10 = paddle._C_ops.roll(reshape_167, full_int_array_11, [1, 2]) + del reshape_167 + + # pd_op.shape64: (4xi64) <- (-1x7x7x768xf32) + shape64_51 = paddle._C_ops.shape64(roll_10) + + # pd_op.slice: (xi64) <- (4xi64, 1xi64, 1xi64) + slice_84 = paddle._C_ops.slice( + shape64_51, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_51 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64, xi64) + combine_78 = [slice_84, full_40, full_3, full_40, full_3, full_28] + del full_3, slice_84 + + # pd_op.stack: (6xi64) <- ([xi64, xi64, xi64, xi64, xi64, xi64]) + stack_75 = paddle._C_ops.stack(combine_78, 0) + del combine_78 + + # pd_op.reshape: (-1x1x7x1x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_168 = paddle._C_ops.reshape(roll_10, stack_75) + del roll_10, stack_75 + + # pd_op.transpose: (-1x1x1x7x7x768xf32) <- (-1x1x7x1x7x768xf32) + transpose_72 = paddle._C_ops.transpose(reshape_168, [0, 1, 3, 2, 4, 5]) + del reshape_168 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x1x7x7x768xf32, 4xi64) + reshape_169 = paddle._C_ops.reshape(transpose_72, full_int_array_43) + del transpose_72 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_170 = paddle._C_ops.reshape(reshape_169, full_int_array_44) + del full_int_array_44, reshape_169 + + # pd_op.full: (1x7x7x1xf32) <- () + full_42 = paddle._C_ops.full( + [1, 7, 7, 1], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__46 = paddle._C_ops.set_value_( + full_42, + full_int_array_12, + full_int_array_13, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("0")], + ) + del full_42, full_int_array_12 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__47 = paddle._C_ops.set_value_( + set_value__46, + full_int_array_15, + full_int_array_16, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("1")], + ) + del full_int_array_15, set_value__46 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__48 = paddle._C_ops.set_value_( + set_value__47, + full_int_array_17, + full_int_array_18, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("2")], + ) + del full_int_array_17, full_int_array_18, set_value__47 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__49 = paddle._C_ops.set_value_( + set_value__48, + full_int_array_19, + full_int_array_20, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("3")], + ) + del full_int_array_19, set_value__48 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__50 = paddle._C_ops.set_value_( + set_value__49, + full_int_array_13, + full_int_array_11, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("4")], + ) + del full_int_array_13, set_value__49 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__51 = paddle._C_ops.set_value_( + set_value__50, + full_int_array_16, + full_int_array_21, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("5")], + ) + del full_int_array_16, full_int_array_21, set_value__50 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__52 = paddle._C_ops.set_value_( + set_value__51, + full_int_array_22, + full_int_array_23, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("6")], + ) + del full_int_array_22, full_int_array_23, set_value__51 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__53 = paddle._C_ops.set_value_( + set_value__52, + full_int_array_20, + full_int_array_24, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("7")], + ) + del full_int_array_20, full_int_array_24, set_value__52 + + # pd_op.set_value_: (1x7x7x1xf32) <- (1x7x7x1xf32, 2xi64, 2xi64, 2xi64) + set_value__5 = paddle._C_ops.set_value_( + set_value__53, + full_int_array_11, + full_int_array_25, + full_int_array_14, + [1, 2], + [], + [], + [1], + [float("8")], + ) + del full_int_array_11, full_int_array_25, set_value__53 + + # pd_op.full_int_array: (6xi64) <- () + full_int_array_46 = [1, 1, 7, 1, 7, 1] + + # pd_op.reshape: (1x1x7x1x7x1xf32) <- (1x7x7x1xf32, 6xi64) + reshape_171 = paddle._C_ops.reshape(set_value__5, full_int_array_46) + del full_int_array_46 + + # pd_op.transpose: (1x1x1x7x7x1xf32) <- (1x1x7x1x7x1xf32) + transpose_73 = paddle._C_ops.transpose(reshape_171, [0, 1, 3, 2, 4, 5]) + del reshape_171 + + # pd_op.reshape: (1x7x7x1xf32) <- (1x1x1x7x7x1xf32, 4xi64) + reshape_172 = paddle._C_ops.reshape(transpose_73, full_int_array_27) + del full_int_array_27, transpose_73 + + # pd_op.reshape: (1x49xf32) <- (1x7x7x1xf32, 2xi64) + reshape_173 = paddle._C_ops.reshape(reshape_172, full_int_array_28) + del full_int_array_28, reshape_172 + + # pd_op.unsqueeze: (1x1x49xf32) <- (1x49xf32, 1xi64) + unsqueeze_31 = paddle._C_ops.unsqueeze(reshape_173, full_int_array_1) + + # pd_op.unsqueeze: (1x49x1xf32) <- (1x49xf32, 1xi64) + unsqueeze_32 = paddle._C_ops.unsqueeze(reshape_173, full_int_array_5) + del reshape_173 + + # pd_op.subtract: (1x49x49xf32) <- (1x1x49xf32, 1x49x1xf32) + subtract_5 = paddle._C_ops.subtract(unsqueeze_31, unsqueeze_32) + del unsqueeze_31, unsqueeze_32 + + # pd_op.not_equal: (1x49x49xb) <- (1x49x49xf32, xf32) + not_equal_5 = paddle._C_ops.not_equal(subtract_5, full_10) + + # pd_op.full: (1x49x49xf32) <- () + full_43 = paddle._C_ops.full( + [1, 49, 49], + float("-100"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_10 = paddle._C_ops.where(not_equal_5, full_43, subtract_5) + del full_43, not_equal_5, subtract_5 + + # pd_op.equal: (1x49x49xb) <- (1x49x49xf32, xf32) + equal_5 = paddle._C_ops.equal(where_10, full_10) + del full_10 + + # pd_op.full: (1x49x49xf32) <- () + full_44 = paddle._C_ops.full( + [1, 49, 49], + float("0"), + paddle.float32, + paddle.framework._current_expected_place(), + ) + + # pd_op.where: (1x49x49xf32) <- (1x49x49xb, 1x49x49xf32, 1x49x49xf32) + where_11 = paddle._C_ops.where(equal_5, full_44, where_10) + del equal_5, full_44, where_10 + + # pd_op.shape64: (3xi64) <- (-1x49x768xf32) + shape64_52 = paddle._C_ops.shape64(reshape_170) + + # pd_op.slice: (xi64) <- (3xi64, 1xi64, 1xi64) + slice_85 = paddle._C_ops.slice( + shape64_52, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + del shape64_52 + + # pd_op.matmul: (-1x49x2304xf32) <- (-1x49x768xf32, 768x2304xf32) + matmul_69 = paddle._C_ops.matmul(reshape_170, parameter_13, False, False) + del parameter_13, reshape_170 + + # pd_op.add: (-1x49x2304xf32) <- (-1x49x2304xf32, 2304xf32) + add_84 = paddle._C_ops.add(matmul_69, parameter_12) + del matmul_69, parameter_12 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_79 = [slice_85, full_4, full_5, full_41, full_6] + del full_5, full_6 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_76 = paddle._C_ops.stack(combine_79, 0) + del combine_79 + + # pd_op.reshape: (-1x49x3x24x32xf32) <- (-1x49x2304xf32, 5xi64) + reshape_174 = paddle._C_ops.reshape(add_84, stack_76) + del add_84, stack_76 + + # pd_op.transpose: (3x-1x24x49x32xf32) <- (-1x49x3x24x32xf32) + transpose_74 = paddle._C_ops.transpose(reshape_174, [2, 0, 3, 1, 4]) + del reshape_174 + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_86 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_0, full_int_array_1, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_87 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_1, full_int_array_5, [1], [0] + ) + + # pd_op.slice: (-1x24x49x32xf32) <- (3x-1x24x49x32xf32, 1xi64, 1xi64) + slice_88 = paddle._C_ops.slice( + transpose_74, [0], full_int_array_5, full_int_array_6, [1], [0] + ) + del full_int_array_6, transpose_74 + + # pd_op.scale: (-1x24x49x32xf32) <- (-1x24x49x32xf32, 1xf32) + scale_11 = paddle._C_ops.scale(slice_86, full_7, float("0"), True) + del full_7, slice_86 + + # pd_op.transpose: (-1x24x32x49xf32) <- (-1x24x49x32xf32) + transpose_75 = paddle._C_ops.transpose(slice_87, [0, 1, 3, 2]) + del slice_87 + + # pd_op.matmul: (-1x24x49x49xf32) <- (-1x24x49x32xf32, -1x24x32x49xf32) + matmul_70 = paddle._C_ops.matmul(scale_11, transpose_75, False, False) + del scale_11, transpose_75 + + # pd_op.reshape: (2401xi64) <- (49x49xi64, 1xi64) + reshape_175 = paddle._C_ops.reshape(data_23, full_int_array_7) + del data_23, full_int_array_7 + + # pd_op.index_select: (2401x24xf32) <- (169x24xf32, 2401xi64) + index_select_11 = paddle._C_ops.index_select(data_24, reshape_175, 0) + del data_24, reshape_175 + + # pd_op.reshape: (49x49x24xf32) <- (2401x24xf32, 3xi64) + reshape_176 = paddle._C_ops.reshape(index_select_11, full_int_array_8) + del full_int_array_8, index_select_11 + + # pd_op.transpose: (24x49x49xf32) <- (49x49x24xf32) + transpose_76 = paddle._C_ops.transpose(reshape_176, [2, 0, 1]) + del reshape_176 + + # pd_op.unsqueeze: (1x24x49x49xf32) <- (24x49x49xf32, 1xi64) + unsqueeze_33 = paddle._C_ops.unsqueeze(transpose_76, full_int_array_0) + del transpose_76 + + # pd_op.add: (-1x24x49x49xf32) <- (-1x24x49x49xf32, 1x24x49x49xf32) + add_85 = paddle._C_ops.add(matmul_70, unsqueeze_33) + del matmul_70, unsqueeze_33 + + # pd_op.full: (xi64) <- () + full_45 = paddle._C_ops.full( + [], float("1"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.floor_divide: (xi64) <- (xi64, xi64) + floor_divide_5 = paddle._C_ops.floor_divide(slice_85, full_45) + del full_45 + + # builtin.combine: ([xi64, xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64, xi64) + combine_80 = [floor_divide_5, full_40, full_41, full_4, full_4] + del floor_divide_5, full_40 + + # pd_op.stack: (5xi64) <- ([xi64, xi64, xi64, xi64, xi64]) + stack_77 = paddle._C_ops.stack(combine_80, 0) + del combine_80 + + # pd_op.reshape: (-1x1x24x49x49xf32) <- (-1x24x49x49xf32, 5xi64) + reshape_177 = paddle._C_ops.reshape(add_85, stack_77) + del add_85, stack_77 + + # pd_op.unsqueeze: (1x1x49x49xf32) <- (1x49x49xf32, 1xi64) + unsqueeze_34 = paddle._C_ops.unsqueeze(where_11, full_int_array_1) + del full_int_array_1, where_11 + + # pd_op.unsqueeze: (1x1x1x49x49xf32) <- (1x1x49x49xf32, 1xi64) + unsqueeze_35 = paddle._C_ops.unsqueeze(unsqueeze_34, full_int_array_0) + del full_int_array_0, unsqueeze_34 + + # pd_op.add: (-1x1x24x49x49xf32) <- (-1x1x24x49x49xf32, 1x1x1x49x49xf32) + add_86 = paddle._C_ops.add(reshape_177, unsqueeze_35) + del reshape_177, unsqueeze_35 + + # builtin.combine: ([xi64, xi64, xi64, xi64]) <- (xi64, xi64, xi64, xi64) + combine_81 = [slice_85, full_41, full_4, full_4] + del full_41 + + # pd_op.stack: (4xi64) <- ([xi64, xi64, xi64, xi64]) + stack_78 = paddle._C_ops.stack(combine_81, 0) + del combine_81 + + # pd_op.reshape: (-1x24x49x49xf32) <- (-1x1x24x49x49xf32, 4xi64) + reshape_178 = paddle._C_ops.reshape(add_86, stack_78) + del add_86, stack_78 + + # pd_op.softmax: (-1x24x49x49xf32) <- (-1x24x49x49xf32) + softmax_11 = paddle._C_ops.softmax(reshape_178, -1) + del reshape_178 + + # pd_op.matmul: (-1x24x49x32xf32) <- (-1x24x49x49xf32, -1x24x49x32xf32) + matmul_71 = paddle._C_ops.matmul(softmax_11, slice_88, False, False) + del slice_88, softmax_11 + + # pd_op.transpose: (-1x49x24x32xf32) <- (-1x24x49x32xf32) + transpose_77 = paddle._C_ops.transpose(matmul_71, [0, 2, 1, 3]) + del matmul_71 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_82 = [slice_85, full_4, full_28] + del slice_85 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_79 = paddle._C_ops.stack(combine_82, 0) + del combine_82 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x49x24x32xf32, 3xi64) + reshape_179 = paddle._C_ops.reshape(transpose_77, stack_79) + del stack_79, transpose_77 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x768xf32, 768x768xf32) + matmul_72 = paddle._C_ops.matmul(reshape_179, parameter_11, False, False) + del parameter_11, reshape_179 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_87 = paddle._C_ops.add(matmul_72, parameter_10) + del matmul_72, parameter_10 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x49x768xf32, 4xi64) + reshape_180 = paddle._C_ops.reshape(add_87, full_int_array_43) + del add_87 + + # pd_op.reshape: (-1x1x1x7x7x768xf32) <- (-1x7x7x768xf32, 6xi64) + reshape_181 = paddle._C_ops.reshape(reshape_180, full_int_array_45) + del full_int_array_45, reshape_180 + + # pd_op.transpose: (-1x1x7x1x7x768xf32) <- (-1x1x1x7x7x768xf32) + transpose_78 = paddle._C_ops.transpose(reshape_181, [0, 1, 3, 2, 4, 5]) + del reshape_181 + + # pd_op.reshape: (-1x7x7x768xf32) <- (-1x1x7x1x7x768xf32, 4xi64) + reshape_182 = paddle._C_ops.reshape(transpose_78, full_int_array_43) + del full_int_array_43, transpose_78 + + # pd_op.roll: (-1x7x7x768xf32) <- (-1x7x7x768xf32, 2xi64) + roll_11 = paddle._C_ops.roll(reshape_182, full_int_array_29, [1, 2]) + del full_int_array_29, reshape_182 + + # builtin.combine: ([xi64, xi64, xi64]) <- (xi64, xi64, xi64) + combine_83 = [slice_82, full_4, full_28] + del full_28, full_4, slice_82 + + # pd_op.stack: (3xi64) <- ([xi64, xi64, xi64]) + stack_80 = paddle._C_ops.stack(combine_83, 0) + del combine_83 + + # pd_op.reshape: (-1x49x768xf32) <- (-1x7x7x768xf32, 3xi64) + reshape_183 = paddle._C_ops.reshape(roll_11, stack_80) + del roll_11, stack_80 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, -1x49x768xf32) + add_88 = paddle._C_ops.add(add_83, reshape_183) + del add_83, reshape_183 + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_81, layer_norm_82, layer_norm_83 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_88, parameter_9, parameter_8, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del parameter_8, parameter_9 + + # pd_op.matmul: (-1x49x3072xf32) <- (-1x49x768xf32, 768x3072xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_81, parameter_7, False, False) + del layer_norm_81, parameter_7 + + # pd_op.add: (-1x49x3072xf32) <- (-1x49x3072xf32, 3072xf32) + add_89 = paddle._C_ops.add(matmul_73, parameter_6) + del matmul_73, parameter_6 + + # pd_op.gelu: (-1x49x3072xf32) <- (-1x49x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_89, False) + del add_89 + + # pd_op.matmul: (-1x49x768xf32) <- (-1x49x3072xf32, 3072x768xf32) + matmul_74 = paddle._C_ops.matmul(gelu_11, parameter_5, False, False) + del gelu_11, parameter_5 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, 768xf32) + add_90 = paddle._C_ops.add(matmul_74, parameter_4) + del matmul_74, parameter_4 + + # pd_op.add: (-1x49x768xf32) <- (-1x49x768xf32, -1x49x768xf32) + add_91 = paddle._C_ops.add(add_88, add_90) + del add_88, add_90 + + # pd_op.layer_norm: (-1x49x768xf32, -1x49xf32, -1x49xf32) <- (-1x49x768xf32, 768xf32, 768xf32) + layer_norm_84, layer_norm_85, layer_norm_86 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_91, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_91, parameter_2, parameter_3 + + # pd_op.transpose: (-1x768x49xf32) <- (-1x49x768xf32) + transpose_79 = paddle._C_ops.transpose(layer_norm_84, [0, 2, 1]) + del layer_norm_84 + + # pd_op.unsqueeze: (-1x768x1x49xf32) <- (-1x768x49xf32, 1xi64) + unsqueeze_36 = paddle._C_ops.unsqueeze(transpose_79, full_int_array_5) + del transpose_79 + + # pd_op.pool2d: (-1x768x1x1xf32) <- (-1x768x1x49xf32, 2xi64) + pool2d_0 = paddle._C_ops.pool2d( + unsqueeze_36, + full_int_array_14, + [1, 1], + [0, 0], + False, + True, + "NCHW", + "avg", + False, + True, + "EXPLICIT", + ) + del full_int_array_14, unsqueeze_36 + + # pd_op.squeeze: (-1x768x1xf32) <- (-1x768x1x1xf32, 1xi64) + squeeze_0 = paddle._C_ops.squeeze(pool2d_0, full_int_array_5) + del full_int_array_5, pool2d_0 + + # pd_op.flatten: (-1x768xf32) <- (-1x768x1xf32) + flatten_1 = paddle._C_ops.flatten(squeeze_0, 1, 2) + del squeeze_0 + + # pd_op.matmul: (-1x102xf32) <- (-1x768xf32, 768x102xf32) + matmul_75 = paddle._C_ops.matmul(flatten_1, parameter_1, False, False) + del flatten_1, parameter_1 + + # pd_op.add: (-1x102xf32) <- (-1x102xf32, 102xf32) + add_0 = paddle._C_ops.add(matmul_75, parameter_0) + del matmul_75, parameter_0 + + return ( + add_0, + set_value__0, + set_value__1, + set_value__2, + set_value__3, + set_value__4, + set_value__5, + ) diff --git a/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/weight_meta.py b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/weight_meta.py new file mode 100644 index 00000000..fb09e5cb --- /dev/null +++ b/paddle_samples/vision-model/SwinTransformer_tiny_patch4_window7_224/subgraph_2/weight_meta.py @@ -0,0 +1,1447 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768, 102] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [3072, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768, 3072] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [768, 2304] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [1536, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [1536, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [384, 1536] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [384, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [384, 1152] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [768, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [192, 768] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [192, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [192, 576] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [384, 192] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [384, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [96, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [96, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [96, 288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [384, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [96, 384] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [96, 96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [96, 288] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [96] + dtype = "float32" + low = 0 + high = 0.5 + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [96, 3, 4, 4] + dtype = "float32" + low = 0 + high = 0.5 + data = None